linux-kernel - Re: [PATCH drm-next 05/14] drm/nouveau: new VM

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a214b28b-043c-a8bb-69da-b4d8216fce56@amd.com>
Date:   Fri, 27 Jan 2023 14:23:05 +0100
From:   Christian König <christian.koenig@....com>
To:     Danilo Krummrich <dakr@...hat.com>,
        Matthew Brost <matthew.brost@...el.com>
Cc:     daniel@...ll.ch, airlied@...hat.com, bskeggs@...hat.com,
        jason@...kstrand.net, tzimmermann@...e.de, mripard@...nel.org,
        corbet@....net, nouveau@...ts.freedesktop.org,
        linux-kernel@...r.kernel.org, dri-devel@...ts.freedesktop.org,
        linux-doc@...r.kernel.org
Subject: Re: [PATCH drm-next 05/14] drm/nouveau: new VM_BIND uapi interfaces



Am 27.01.23 um 14:12 schrieb Danilo Krummrich:
> On 1/27/23 08:55, Christian König wrote:
>> Am 27.01.23 um 02:26 schrieb Danilo Krummrich:
>>> On 1/27/23 02:05, Matthew Brost wrote:
>>>> On Wed, Jan 18, 2023 at 07:12:47AM +0100, Danilo Krummrich wrote:
>>>>> This commit provides the interfaces for the new UAPI motivated by the
>>>>> Vulkan API. It allows user mode drivers (UMDs) to:
>>>>>
>>>>> 1) Initialize a GPU virtual address (VA) space via the new
>>>>>     DRM_IOCTL_NOUVEAU_VM_INIT ioctl. UMDs can provide a kernel 
>>>>> reserved
>>>>>     VA area.
>>>>>
>>>>> 2) Bind and unbind GPU VA space mappings via the new
>>>>>     DRM_IOCTL_NOUVEAU_VM_BIND ioctl.
>>>>>
>>>>> 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl.
>>>>>
>>>>> Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC support
>>>>> asynchronous processing with DRM syncobjs as synchronization 
>>>>> mechanism.
>>>>>
>>>>> The default DRM_IOCTL_NOUVEAU_VM_BIND is synchronous processing,
>>>>> DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only.
>>>>>
>>>>> Co-authored-by: Dave Airlie <airlied@...hat.com>
>>>>> Signed-off-by: Danilo Krummrich <dakr@...hat.com>
>>>>> ---
>>>>>   Documentation/gpu/driver-uapi.rst |   8 ++
>>>>>   include/uapi/drm/nouveau_drm.h    | 216 
>>>>> ++++++++++++++++++++++++++++++
>>>>>   2 files changed, 224 insertions(+)
>>>>>
>>>>> diff --git a/Documentation/gpu/driver-uapi.rst 
>>>>> b/Documentation/gpu/driver-uapi.rst
>>>>> index 4411e6919a3d..9c7ca6e33a68 100644
>>>>> --- a/Documentation/gpu/driver-uapi.rst
>>>>> +++ b/Documentation/gpu/driver-uapi.rst
>>>>> @@ -6,3 +6,11 @@ drm/i915 uAPI
>>>>>   =============
>>>>>     .. kernel-doc:: include/uapi/drm/i915_drm.h
>>>>> +
>>>>> +drm/nouveau uAPI
>>>>> +================
>>>>> +
>>>>> +VM_BIND / EXEC uAPI
>>>>> +-------------------
>>>>> +
>>>>> +.. kernel-doc:: include/uapi/drm/nouveau_drm.h
>>>>> diff --git a/include/uapi/drm/nouveau_drm.h 
>>>>> b/include/uapi/drm/nouveau_drm.h
>>>>> index 853a327433d3..f6e7d40201d4 100644
>>>>> --- a/include/uapi/drm/nouveau_drm.h
>>>>> +++ b/include/uapi/drm/nouveau_drm.h
>>>>> @@ -126,6 +126,216 @@ struct drm_nouveau_gem_cpu_fini {
>>>>>       __u32 handle;
>>>>>   };
>>>>>   +/**
>>>>> + * struct drm_nouveau_sync - sync object
>>>>> + *
>>>>> + * This structure serves as synchronization mechanism for 
>>>>> (potentially)
>>>>> + * asynchronous operations such as EXEC or VM_BIND.
>>>>> + */
>>>>> +struct drm_nouveau_sync {
>>>>> +    /**
>>>>> +     * @flags: the flags for a sync object
>>>>> +     *
>>>>> +     * The first 8 bits are used to determine the type of the 
>>>>> sync object.
>>>>> +     */
>>>>> +    __u32 flags;
>>>>> +#define DRM_NOUVEAU_SYNC_SYNCOBJ 0x0
>>>>> +#define DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ 0x1
>>>>> +#define DRM_NOUVEAU_SYNC_TYPE_MASK 0xf
>>>>> +    /**
>>>>> +     * @handle: the handle of the sync object
>>>>> +     */
>>>>> +    __u32 handle;
>>>>> +    /**
>>>>> +     * @timeline_value:
>>>>> +     *
>>>>> +     * The timeline point of the sync object in case the syncobj 
>>>>> is of
>>>>> +     * type DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ.
>>>>> +     */
>>>>> +    __u64 timeline_value;
>>>>> +};
>>>>> +
>>>>> +/**
>>>>> + * struct drm_nouveau_vm_init - GPU VA space init structure
>>>>> + *
>>>>> + * Used to initialize the GPU's VA space for a user client, 
>>>>> telling the kernel
>>>>> + * which portion of the VA space is managed by the UMD and kernel 
>>>>> respectively.
>>>>> + */
>>>>> +struct drm_nouveau_vm_init {
>>>>> +    /**
>>>>> +     * @unmanaged_addr: start address of the kernel managed VA 
>>>>> space region
>>>>> +     */
>>>>> +    __u64 unmanaged_addr;
>>>>> +    /**
>>>>> +     * @unmanaged_size: size of the kernel managed VA space 
>>>>> region in bytes
>>>>> +     */
>>>>> +    __u64 unmanaged_size;
>>>>> +};
>>>>> +
>>>>> +/**
>>>>> + * struct drm_nouveau_vm_bind_op - VM_BIND operation
>>>>> + *
>>>>> + * This structure represents a single VM_BIND operation. UMDs 
>>>>> should pass
>>>>> + * an array of this structure via struct drm_nouveau_vm_bind's 
>>>>> &op_ptr field.
>>>>> + */
>>>>> +struct drm_nouveau_vm_bind_op {
>>>>> +    /**
>>>>> +     * @op: the operation type
>>>>> +     */
>>>>> +    __u32 op;
>>>>> +/**
>>>>> + * @DRM_NOUVEAU_VM_BIND_OP_ALLOC:
>>>>> + *
>>>>> + * The alloc operation is used to reserve a VA space region 
>>>>> within the GPU's VA
>>>>> + * space. Optionally, the &DRM_NOUVEAU_VM_BIND_SPARSE flag can be 
>>>>> passed to
>>>>> + * instruct the kernel to create sparse mappings for the given 
>>>>> region.
>>>>> + */
>>>>> +#define DRM_NOUVEAU_VM_BIND_OP_ALLOC 0x0
>>>>
>>>> Do you really need this operation? We have no concept of this in Xe,
>>>> e.g. we can create a VM and the entire address space is managed 
>>>> exactly
>>>> the same.
>>>
>>> The idea for alloc/free is to let UMDs allocate a portion of the VA 
>>> space (which I call a region), basically the same thing Vulkan 
>>> represents with a VKBuffer.
>>
>> If that's mangled into the same component/interface then I can say 
>> from experience that this is a pretty bad idea. We have tried 
>> something similar with radeon and it turned out horrible.
>
> What was the exact constellation in radeon and which problems did 
> arise from it?
>
>>
>> What you want is one component for tracking the VA allocations 
>> (drm_mm based) and a different component/interface for tracking the 
>> VA mappings (probably rb tree based).
>
> That's what the GPUVA manager is doing. There are gpuva_regions which 
> correspond to VA allocations and gpuvas which represent the mappings. 
> Both are tracked separately (currently both with a separate drm_mm, 
> though). However, the GPUVA manager needs to take regions into account 
> when dealing with mappings to make sure the GPUVA manager doesn't 
> propose drivers to merge over region boundaries. Speaking from 
> userspace PoV, the kernel wouldn't merge mappings from different 
> VKBuffer objects even if they're virtually and physically contiguous.

That are two completely different things and shouldn't be handled in a 
single component.

We should probably talk about the design of the GPUVA manager once more 
when this should be applicable to all GPU drivers.

>
> For sparse residency the kernel also needs to know the region 
> boundaries to make sure that it keeps sparse mappings around.

What?

Regards,
Christian.

>
>>
>> amdgpu has even gotten so far that the VA allocations are tracked in 
>> libdrm in userspace
>>
>> Regards,
>> Christian.
>>
>>>
>>> It serves two purposes:
>>>
>>> 1. It gives the kernel (in particular the GPUVA manager) the bounds 
>>> in which it is allowed to merge mappings. E.g. when a user request 
>>> asks for a new mapping and we detect we could merge this mapping 
>>> with an existing one (used in another VKBuffer than the mapping 
>>> request came for) the driver is not allowed to change the page table 
>>> for the existing mapping we want to merge with (assuming that some 
>>> drivers would need to do this in order to merge), because the 
>>> existing mapping could already be in use and by re-mapping it we'd 
>>> potentially cause a fault on the GPU.
>>>
>>> 2. It is used for sparse residency in a way that such an allocated 
>>> VA space region can be flagged as sparse, such that the kernel 
>>> always keeps sparse mappings around for the parts of the region that 
>>> do not contain actual memory backed mappings.
>>>
>>> If for your driver merging is always OK, creating a single huge 
>>> region would do the trick I guess. Otherwise, we could also add an 
>>> option to the GPUVA manager (or a specific region, which could also 
>>> be a single huge one) within which it never merges.
>>>
>>>>
>>>> If this can be removed then the entire concept of regions in the GPUVA
>>>> can be removed too (drop struct drm_gpuva_region). I say this because
>>>> in Xe as I'm porting over to GPUVA the first thing I'm doing after
>>>> drm_gpuva_manager_init is calling drm_gpuva_region_insert on the 
>>>> entire
>>>> address space. To me this seems kinda useless but maybe I'm missing 
>>>> why
>>>> you need this for Nouveau.
>>>>
>>>> Matt
>>>>
>>>>> +/**
>>>>> + * @DRM_NOUVEAU_VM_BIND_OP_FREE: Free a reserved VA space region.
>>>>> + */
>>>>> +#define DRM_NOUVEAU_VM_BIND_OP_FREE 0x1
>>>>> +/**
>>>>> + * @DRM_NOUVEAU_VM_BIND_OP_MAP:
>>>>> + *
>>>>> + * Map a GEM object to the GPU's VA space. The mapping must be 
>>>>> fully enclosed by
>>>>> + * a previously allocated VA space region. If the region is 
>>>>> sparse, existing
>>>>> + * sparse mappings are overwritten.
>>>>> + */
>>>>> +#define DRM_NOUVEAU_VM_BIND_OP_MAP 0x2
>>>>> +/**
>>>>> + * @DRM_NOUVEAU_VM_BIND_OP_UNMAP:
>>>>> + *
>>>>> + * Unmap an existing mapping in the GPU's VA space. If the region 
>>>>> the mapping
>>>>> + * is located in is a sparse region, new sparse mappings are 
>>>>> created where the
>>>>> + * unmapped (memory backed) mapping was mapped previously.
>>>>> + */
>>>>> +#define DRM_NOUVEAU_VM_BIND_OP_UNMAP 0x3
>>>>> +    /**
>>>>> +     * @flags: the flags for a &drm_nouveau_vm_bind_op
>>>>> +     */
>>>>> +    __u32 flags;
>>>>> +/**
>>>>> + * @DRM_NOUVEAU_VM_BIND_SPARSE:
>>>>> + *
>>>>> + * Indicates that an allocated VA space region should be sparse.
>>>>> + */
>>>>> +#define DRM_NOUVEAU_VM_BIND_SPARSE (1 << 8)
>>>>> +    /**
>>>>> +     * @handle: the handle of the DRM GEM object to map
>>>>> +     */
>>>>> +    __u32 handle;
>>>>> +    /**
>>>>> +     * @addr:
>>>>> +     *
>>>>> +     * the address the VA space region or (memory backed) mapping 
>>>>> should be mapped to
>>>>> +     */
>>>>> +    __u64 addr;
>>>>> +    /**
>>>>> +     * @bo_offset: the offset within the BO backing the mapping
>>>>> +     */
>>>>> +    __u64 bo_offset;
>>>>> +    /**
>>>>> +     * @range: the size of the requested mapping in bytes
>>>>> +     */
>>>>> +    __u64 range;
>>>>> +};
>>>>> +
>>>>> +/**
>>>>> + * struct drm_nouveau_vm_bind - structure for 
>>>>> DRM_IOCTL_NOUVEAU_VM_BIND
>>>>> + */
>>>>> +struct drm_nouveau_vm_bind {
>>>>> +    /**
>>>>> +     * @op_count: the number of &drm_nouveau_vm_bind_op
>>>>> +     */
>>>>> +    __u32 op_count;
>>>>> +    /**
>>>>> +     * @flags: the flags for a &drm_nouveau_vm_bind ioctl
>>>>> +     */
>>>>> +    __u32 flags;
>>>>> +/**
>>>>> + * @DRM_NOUVEAU_VM_BIND_RUN_ASYNC:
>>>>> + *
>>>>> + * Indicates that the given VM_BIND operation should be executed 
>>>>> asynchronously
>>>>> + * by the kernel.
>>>>> + *
>>>>> + * If this flag is not supplied the kernel executes the 
>>>>> associated operations
>>>>> + * synchronously and doesn't accept any &drm_nouveau_sync objects.
>>>>> + */
>>>>> +#define DRM_NOUVEAU_VM_BIND_RUN_ASYNC 0x1
>>>>> +    /**
>>>>> +     * @wait_count: the number of wait &drm_nouveau_syncs
>>>>> +     */
>>>>> +    __u32 wait_count;
>>>>> +    /**
>>>>> +     * @sig_count: the number of &drm_nouveau_syncs to signal 
>>>>> when finished
>>>>> +     */
>>>>> +    __u32 sig_count;
>>>>> +    /**
>>>>> +     * @wait_ptr: pointer to &drm_nouveau_syncs to wait for
>>>>> +     */
>>>>> +    __u64 wait_ptr;
>>>>> +    /**
>>>>> +     * @sig_ptr: pointer to &drm_nouveau_syncs to signal when 
>>>>> finished
>>>>> +     */
>>>>> +    __u64 sig_ptr;
>>>>> +    /**
>>>>> +     * @op_ptr: pointer to the &drm_nouveau_vm_bind_ops to execute
>>>>> +     */
>>>>> +    __u64 op_ptr;
>>>>> +};
>>>>> +
>>>>> +/**
>>>>> + * struct drm_nouveau_exec_push - EXEC push operation
>>>>> + *
>>>>> + * This structure represents a single EXEC push operation. UMDs 
>>>>> should pass an
>>>>> + * array of this structure via struct drm_nouveau_exec's 
>>>>> &push_ptr field.
>>>>> + */
>>>>> +struct drm_nouveau_exec_push {
>>>>> +    /**
>>>>> +     * @va: the virtual address of the push buffer mapping
>>>>> +     */
>>>>> +    __u64 va;
>>>>> +    /**
>>>>> +     * @va_len: the length of the push buffer mapping
>>>>> +     */
>>>>> +    __u64 va_len;
>>>>> +};
>>>>> +
>>>>> +/**
>>>>> + * struct drm_nouveau_exec - structure for DRM_IOCTL_NOUVEAU_EXEC
>>>>> + */
>>>>> +struct drm_nouveau_exec {
>>>>> +    /**
>>>>> +     * @channel: the channel to execute the push buffer in
>>>>> +     */
>>>>> +    __u32 channel;
>>>>> +    /**
>>>>> +     * @push_count: the number of &drm_nouveau_exec_push ops
>>>>> +     */
>>>>> +    __u32 push_count;
>>>>> +    /**
>>>>> +     * @wait_count: the number of wait &drm_nouveau_syncs
>>>>> +     */
>>>>> +    __u32 wait_count;
>>>>> +    /**
>>>>> +     * @sig_count: the number of &drm_nouveau_syncs to signal 
>>>>> when finished
>>>>> +     */
>>>>> +    __u32 sig_count;
>>>>> +    /**
>>>>> +     * @wait_ptr: pointer to &drm_nouveau_syncs to wait for
>>>>> +     */
>>>>> +    __u64 wait_ptr;
>>>>> +    /**
>>>>> +     * @sig_ptr: pointer to &drm_nouveau_syncs to signal when 
>>>>> finished
>>>>> +     */
>>>>> +    __u64 sig_ptr;
>>>>> +    /**
>>>>> +     * @push_ptr: pointer to &drm_nouveau_exec_push ops
>>>>> +     */
>>>>> +    __u64 push_ptr;
>>>>> +};
>>>>> +
>>>>>   #define DRM_NOUVEAU_GETPARAM           0x00 /* deprecated */
>>>>>   #define DRM_NOUVEAU_SETPARAM           0x01 /* deprecated */
>>>>>   #define DRM_NOUVEAU_CHANNEL_ALLOC      0x02 /* deprecated */
>>>>> @@ -136,6 +346,9 @@ struct drm_nouveau_gem_cpu_fini {
>>>>>   #define DRM_NOUVEAU_NVIF               0x07
>>>>>   #define DRM_NOUVEAU_SVM_INIT           0x08
>>>>>   #define DRM_NOUVEAU_SVM_BIND           0x09
>>>>> +#define DRM_NOUVEAU_VM_INIT            0x10
>>>>> +#define DRM_NOUVEAU_VM_BIND            0x11
>>>>> +#define DRM_NOUVEAU_EXEC               0x12
>>>>>   #define DRM_NOUVEAU_GEM_NEW            0x40
>>>>>   #define DRM_NOUVEAU_GEM_PUSHBUF        0x41
>>>>>   #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
>>>>> @@ -197,6 +410,9 @@ struct drm_nouveau_svm_bind {
>>>>>   #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI       DRM_IOW 
>>>>> (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct 
>>>>> drm_nouveau_gem_cpu_fini)
>>>>>   #define DRM_IOCTL_NOUVEAU_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + 
>>>>> DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info)
>>>>>   +#define DRM_IOCTL_NOUVEAU_VM_INIT DRM_IOWR(DRM_COMMAND_BASE + 
>>>>> DRM_NOUVEAU_VM_INIT, struct drm_nouveau_vm_init)
>>>>> +#define DRM_IOCTL_NOUVEAU_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + 
>>>>> DRM_NOUVEAU_VM_BIND, struct drm_nouveau_vm_bind)
>>>>> +#define DRM_IOCTL_NOUVEAU_EXEC DRM_IOWR(DRM_COMMAND_BASE + 
>>>>> DRM_NOUVEAU_EXEC, struct drm_nouveau_exec)
>>>>>   #if defined(__cplusplus)
>>>>>   }
>>>>>   #endif
>>>>> -- 
>>>>> 2.39.0
>>>>>
>>>>
>>>
>>
>