lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <202601042317.3h1kg7NS-lkp@intel.com>
Date: Sun, 4 Jan 2026 23:41:04 +0100
From: kernel test robot <lkp@...el.com>
To: Haoxiang Li <lihaoxiang@...c.iscas.ac.cn>, Felix.Kuehling@....com,
	alexander.deucher@....com, christian.koenig@....com,
	airlied@...il.com, simona@...ll.ch
Cc: oe-kbuild-all@...ts.linux.dev, amd-gfx@...ts.freedesktop.org,
	dri-devel@...ts.freedesktop.org, linux-kernel@...r.kernel.org,
	Haoxiang Li <lihaoxiang@...c.iscas.ac.cn>
Subject: Re: [PATCH] drm/amdkfd: fix a memory leak in
 device_queue_manager_init()

Hi Haoxiang,

kernel test robot noticed the following build errors:

[auto build test ERROR on drm-misc/drm-misc-next]
[also build test ERROR on linus/master v6.19-rc3 next-20251219]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Haoxiang-Li/drm-amdkfd-fix-a-memory-leak-in-device_queue_manager_init/20260104-211645
base:   https://gitlab.freedesktop.org/drm/misc/kernel.git drm-misc-next
patch link:    https://lore.kernel.org/r/20260104131532.3978895-1-lihaoxiang%40isrc.iscas.ac.cn
patch subject: [PATCH] drm/amdkfd: fix a memory leak in device_queue_manager_init()
config: x86_64-rhel-9.4-ltp (https://download.01.org/0day-ci/archive/20260104/202601042317.3h1kg7NS-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260104/202601042317.3h1kg7NS-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@...el.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601042317.3h1kg7NS-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device_queue_manager.c: In function 'device_queue_manager_init':
>> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device_queue_manager.c:3046:17: error: implicit declaration of function 'deallocate_hiq_sdma_mqd'; did you mean 'allocate_hiq_sdma_mqd'? [-Wimplicit-function-declaration]
    3046 |                 deallocate_hiq_sdma_mqd(dev, &dqm->hiq_sdma_mqd);
         |                 ^~~~~~~~~~~~~~~~~~~~~~~
         |                 allocate_hiq_sdma_mqd
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device_queue_manager.c: At top level:
>> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device_queue_manager.c:3053:13: warning: conflicting types for 'deallocate_hiq_sdma_mqd'; have 'void(struct kfd_node *, struct kfd_mem_obj *)'
    3053 | static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
         |             ^~~~~~~~~~~~~~~~~~~~~~~
>> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device_queue_manager.c:3053:13: error: static declaration of 'deallocate_hiq_sdma_mqd' follows non-static declaration
   drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device_queue_manager.c:3046:17: note: previous implicit declaration of 'deallocate_hiq_sdma_mqd' with type 'void(struct kfd_node *, struct kfd_mem_obj *)'
    3046 |                 deallocate_hiq_sdma_mqd(dev, &dqm->hiq_sdma_mqd);
         |                 ^~~~~~~~~~~~~~~~~~~~~~~


vim +3046 drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device_queue_manager.c

  2921	
  2922	struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
  2923	{
  2924		struct device_queue_manager *dqm;
  2925	
  2926		pr_debug("Loading device queue manager\n");
  2927	
  2928		dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
  2929		if (!dqm)
  2930			return NULL;
  2931	
  2932		switch (dev->adev->asic_type) {
  2933		/* HWS is not available on Hawaii. */
  2934		case CHIP_HAWAII:
  2935		/* HWS depends on CWSR for timely dequeue. CWSR is not
  2936		 * available on Tonga.
  2937		 *
  2938		 * FIXME: This argument also applies to Kaveri.
  2939		 */
  2940		case CHIP_TONGA:
  2941			dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
  2942			break;
  2943		default:
  2944			dqm->sched_policy = sched_policy;
  2945			break;
  2946		}
  2947	
  2948		dqm->dev = dev;
  2949		switch (dqm->sched_policy) {
  2950		case KFD_SCHED_POLICY_HWS:
  2951		case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
  2952			/* initialize dqm for cp scheduling */
  2953			dqm->ops.create_queue = create_queue_cpsch;
  2954			dqm->ops.initialize = initialize_cpsch;
  2955			dqm->ops.start = start_cpsch;
  2956			dqm->ops.stop = stop_cpsch;
  2957			dqm->ops.halt = halt_cpsch;
  2958			dqm->ops.unhalt = unhalt_cpsch;
  2959			dqm->ops.destroy_queue = destroy_queue_cpsch;
  2960			dqm->ops.update_queue = update_queue;
  2961			dqm->ops.register_process = register_process;
  2962			dqm->ops.unregister_process = unregister_process;
  2963			dqm->ops.uninitialize = uninitialize;
  2964			dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
  2965			dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
  2966			dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
  2967			dqm->ops.process_termination = process_termination_cpsch;
  2968			dqm->ops.evict_process_queues = evict_process_queues_cpsch;
  2969			dqm->ops.restore_process_queues = restore_process_queues_cpsch;
  2970			dqm->ops.get_wave_state = get_wave_state;
  2971			dqm->ops.reset_queues = reset_queues_cpsch;
  2972			dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
  2973			dqm->ops.checkpoint_mqd = checkpoint_mqd;
  2974			break;
  2975		case KFD_SCHED_POLICY_NO_HWS:
  2976			/* initialize dqm for no cp scheduling */
  2977			dqm->ops.start = start_nocpsch;
  2978			dqm->ops.stop = stop_nocpsch;
  2979			dqm->ops.create_queue = create_queue_nocpsch;
  2980			dqm->ops.destroy_queue = destroy_queue_nocpsch;
  2981			dqm->ops.update_queue = update_queue;
  2982			dqm->ops.register_process = register_process;
  2983			dqm->ops.unregister_process = unregister_process;
  2984			dqm->ops.initialize = initialize_nocpsch;
  2985			dqm->ops.uninitialize = uninitialize;
  2986			dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
  2987			dqm->ops.process_termination = process_termination_nocpsch;
  2988			dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
  2989			dqm->ops.restore_process_queues =
  2990				restore_process_queues_nocpsch;
  2991			dqm->ops.get_wave_state = get_wave_state;
  2992			dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
  2993			dqm->ops.checkpoint_mqd = checkpoint_mqd;
  2994			break;
  2995		default:
  2996			dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy);
  2997			goto out_free;
  2998		}
  2999	
  3000		switch (dev->adev->asic_type) {
  3001		case CHIP_KAVERI:
  3002		case CHIP_HAWAII:
  3003			device_queue_manager_init_cik(&dqm->asic_ops);
  3004			break;
  3005	
  3006		case CHIP_CARRIZO:
  3007		case CHIP_TONGA:
  3008		case CHIP_FIJI:
  3009		case CHIP_POLARIS10:
  3010		case CHIP_POLARIS11:
  3011		case CHIP_POLARIS12:
  3012		case CHIP_VEGAM:
  3013			device_queue_manager_init_vi(&dqm->asic_ops);
  3014			break;
  3015	
  3016		default:
  3017			if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0))
  3018				device_queue_manager_init_v12(&dqm->asic_ops);
  3019			else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
  3020				device_queue_manager_init_v11(&dqm->asic_ops);
  3021			else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
  3022				device_queue_manager_init_v10(&dqm->asic_ops);
  3023			else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
  3024				device_queue_manager_init_v9(&dqm->asic_ops);
  3025			else {
  3026				WARN(1, "Unexpected ASIC family %u",
  3027				     dev->adev->asic_type);
  3028				goto out_free;
  3029			}
  3030		}
  3031	
  3032		if (init_mqd_managers(dqm))
  3033			goto out_free;
  3034	
  3035		if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
  3036			dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n");
  3037			goto out_free;
  3038		}
  3039	
  3040		if (!dqm->ops.initialize(dqm)) {
  3041			init_waitqueue_head(&dqm->destroy_wait);
  3042			return dqm;
  3043		}
  3044	
  3045		if (!dev->kfd->shared_resources.enable_mes)
> 3046			deallocate_hiq_sdma_mqd(dev, &dqm->hiq_sdma_mqd);
  3047	
  3048	out_free:
  3049		kfree(dqm);
  3050		return NULL;
  3051	}
  3052	
> 3053	static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
  3054					    struct kfd_mem_obj *mqd)
  3055	{
  3056		WARN(!mqd, "No hiq sdma mqd trunk to free");
  3057	
  3058		amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem);
  3059	}
  3060	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ