linux-kernel - Re: [PATCH 3/6] drm/amdgpu: Rework coredump to use memory dynamically

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3764d627-d632-5754-0bcc-a150c157d9f9@amd.com>
Date:   Wed, 12 Jul 2023 12:39:47 +0200
From:   Christian König <christian.koenig@....com>
To:     Lucas Stach <l.stach@...gutronix.de>,
        André Almeida <andrealmeid@...lia.com>,
        dri-devel@...ts.freedesktop.org, amd-gfx@...ts.freedesktop.org,
        linux-kernel@...r.kernel.org
Cc:     pierre-eric.pelloux-prayer@....com,
        'Marek Olšák' <maraeo@...il.com>,
        Timur Kristóf <timur.kristof@...il.com>,
        michel.daenzer@...lbox.org,
        Samuel Pitoiset <samuel.pitoiset@...il.com>,
        kernel-dev@...lia.com, alexander.deucher@....com
Subject: Re: [PATCH 3/6] drm/amdgpu: Rework coredump to use memory dynamically

Am 12.07.23 um 10:59 schrieb Lucas Stach:
> Am Mittwoch, dem 12.07.2023 um 10:37 +0200 schrieb Christian König:
>> Am 11.07.23 um 23:34 schrieb André Almeida:
>>> Instead of storing coredump information inside amdgpu_device struct,
>>> move if to a proper separated struct and allocate it dynamically. This
>>> will make it easier to further expand the logged information.
>> Verry big NAK to this. The problem is that memory allocation isn't
>> allowed during a GPU reset.
>>
>> What you could do is to allocate the memory with GFP_ATOMIC or similar,
>> but for a large structure that might not be possible.
>>
> I'm still not fully clear on what the rules are here. In etnaviv we do
> devcoredump allocation in the GPU reset path with __GFP_NOWARN |
> __GFP_NORETRY, which means the allocation will kick memory reclaim if
> necessary, but will just give up if no memory could be made available
> easily. This satisfies the forward progress guarantee in the absence of
> successful memory allocation, which is the most important property in
> this path, I think.
>
> However, I'm not sure if the reclaim could lead to locking issues or
> something like that with the more complex use-cases with MMU notifiers
> and stuff like that. Christian, do you have any experience or
> information that would be good to share in this regard?

Yeah, very good question.

__GFP_NORETRY isn't sufficient as far as I know. Reclaim must be 
completely suppressed to be able to allocate in a GPU reset handler.

Daniel added lockdep annotation to some of the dma-fence signaling paths 
and this yielded quite a bunch of potential deadlocks.

It's not even that reclaim itself waits for dma_fences (that can happen, 
but is quite unlikely), but rather that reclaim needs locks and under 
those locks we then wait for dma_fences.

We should probably add a define somewhere which documents that 
(GFP_ATOMIC | __NO_WARN) should be used in the GPU reset handlers when 
allocating memory for coredumps.

Regards,
Christian.

>
> Regards,
> Lucas
>
>> Regards,
>> Christian.
>>
>>> Signed-off-by: André Almeida <andrealmeid@...lia.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 14 +++--
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 65 ++++++++++++++--------
>>>    2 files changed, 51 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index dbe062a087c5..e1cc83a89d46 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1068,11 +1068,6 @@ struct amdgpu_device {
>>>    	uint32_t                        *reset_dump_reg_list;
>>>    	uint32_t			*reset_dump_reg_value;
>>>    	int                             num_regs;
>>> -#ifdef CONFIG_DEV_COREDUMP
>>> -	struct amdgpu_task_info         reset_task_info;
>>> -	bool                            reset_vram_lost;
>>> -	struct timespec64               reset_time;
>>> -#endif
>>>    
>>>    	bool                            scpm_enabled;
>>>    	uint32_t                        scpm_status;
>>> @@ -1085,6 +1080,15 @@ struct amdgpu_device {
>>>    	uint32_t			aid_mask;
>>>    };
>>>    
>>> +#ifdef CONFIG_DEV_COREDUMP
>>> +struct amdgpu_coredump_info {
>>> +	struct amdgpu_device		*adev;
>>> +	struct amdgpu_task_info         reset_task_info;
>>> +	struct timespec64               reset_time;
>>> +	bool                            reset_vram_lost;
>>> +};
>>> +#endif
>>> +
>>>    static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
>>>    {
>>>    	return container_of(ddev, struct amdgpu_device, ddev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index e25f085ee886..23b9784e9787 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -4963,12 +4963,17 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
>>>    	return 0;
>>>    }
>>>    
>>> -#ifdef CONFIG_DEV_COREDUMP
>>> +#ifndef CONFIG_DEV_COREDUMP
>>> +static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
>>> +			    struct amdgpu_reset_context *reset_context)
>>> +{
>>> +}
>>> +#else
>>>    static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
>>>    		size_t count, void *data, size_t datalen)
>>>    {
>>>    	struct drm_printer p;
>>> -	struct amdgpu_device *adev = data;
>>> +	struct amdgpu_coredump_info *coredump = data;
>>>    	struct drm_print_iterator iter;
>>>    	int i;
>>>    
>>> @@ -4982,21 +4987,21 @@ static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
>>>    	drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
>>>    	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
>>>    	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
>>> -	drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
>>> -	if (adev->reset_task_info.pid)
>>> +	drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec);
>>> +	if (coredump->reset_task_info.pid)
>>>    		drm_printf(&p, "process_name: %s PID: %d\n",
>>> -			   adev->reset_task_info.process_name,
>>> -			   adev->reset_task_info.pid);
>>> +			   coredump->reset_task_info.process_name,
>>> +			   coredump->reset_task_info.pid);
>>>    
>>> -	if (adev->reset_vram_lost)
>>> +	if (coredump->reset_vram_lost)
>>>    		drm_printf(&p, "VRAM is lost due to GPU reset!\n");
>>> -	if (adev->num_regs) {
>>> +	if (coredump->adev->num_regs) {
>>>    		drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
>>>    
>>> -		for (i = 0; i < adev->num_regs; i++)
>>> +		for (i = 0; i < coredump->adev->num_regs; i++)
>>>    			drm_printf(&p, "0x%08x: 0x%08x\n",
>>> -				   adev->reset_dump_reg_list[i],
>>> -				   adev->reset_dump_reg_value[i]);
>>> +				   coredump->adev->reset_dump_reg_list[i],
>>> +				   coredump->adev->reset_dump_reg_value[i]);
>>>    	}
>>>    
>>>    	return count - iter.remain;
>>> @@ -5004,14 +5009,34 @@ static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
>>>    
>>>    static void amdgpu_devcoredump_free(void *data)
>>>    {
>>> +	kfree(data);
>>>    }
>>>    
>>> -static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
>>> +static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
>>> +			    struct amdgpu_reset_context *reset_context)
>>>    {
>>> +	struct amdgpu_coredump_info *coredump;
>>>    	struct drm_device *dev = adev_to_drm(adev);
>>>    
>>> -	ktime_get_ts64(&adev->reset_time);
>>> -	dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
>>> +	coredump = kmalloc(sizeof(*coredump), GFP_KERNEL);
>>> +
>>> +	if (!coredump) {
>>> +		DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
>>> +		return;
>>> +	}
>>> +
>>> +	memset(coredump, 0, sizeof(*coredump));
>>> +
>>> +	coredump->reset_vram_lost = vram_lost;
>>> +
>>> +	if (reset_context->job && reset_context->job->vm)
>>> +		coredump->reset_task_info = reset_context->job->vm->task_info;
>>> +
>>> +	coredump->adev = adev;
>>> +
>>> +	ktime_get_ts64(&coredump->reset_time);
>>> +
>>> +	dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
>>>    		      amdgpu_devcoredump_read, amdgpu_devcoredump_free);
>>>    }
>>>    #endif
>>> @@ -5119,15 +5144,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
>>>    					goto out;
>>>    
>>>    				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
>>> -#ifdef CONFIG_DEV_COREDUMP
>>> -				tmp_adev->reset_vram_lost = vram_lost;
>>> -				memset(&tmp_adev->reset_task_info, 0,
>>> -						sizeof(tmp_adev->reset_task_info));
>>> -				if (reset_context->job && reset_context->job->vm)
>>> -					tmp_adev->reset_task_info =
>>> -						reset_context->job->vm->task_info;
>>> -				amdgpu_reset_capture_coredumpm(tmp_adev);
>>> -#endif
>>> +
>>> +				amdgpu_coredump(tmp_adev, vram_lost, reset_context);
>>> +
>>>    				if (vram_lost) {
>>>    					DRM_INFO("VRAM is lost due to GPU reset!\n");
>>>    					amdgpu_inc_vram_lost(tmp_adev);