lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <f3d362bc-4ebf-3c36-6b10-06054e5e33dc@linux.intel.com>
Date: Thu, 7 Mar 2024 15:03:07 +0200 (EET)
From: Ilpo Järvinen <ilpo.jarvinen@...ux.intel.com>
To: Kane Chen <kane.chen@...el.com>
cc: LKML <linux-kernel@...r.kernel.org>, platform-driver-x86@...r.kernel.org, 
    david.e.box@...el.com, kane.chen@...el.corp-partner.google.com
Subject: Re: [PATCH 1/1] platform/x86/intel/pmc: Improve PKGC residency
 counters debug

On Thu, 7 Mar 2024, Kane Chen wrote:

> The current code only prints PKGC-10 residency when the PKGC-10
> is not reached in previous 'freeze' attempt. To debug PKGC-10 issues, we
> also need to know other PKGC residency counters to better triage issues.
> Ex:
> 1. When system is stuck in PC2, it can be caused short LTR from device.
> 2. When system is stuck in PC8, it can be caused by display engine.
> 
> To better triage issues, all PKGC residency are needed when issues happens.

happen.

> Example log:
>  CPU did not enter Package C10!!! (Package C10 cnt=0x0)
>  Prev Package C2 cnt = 0x2191a325de, Current Package C2 cnt = 0x21aba30724
>  Prev Package C3 cnt = 0x0, Current Package C3 cnt = 0x0
>  Prev Package C6 cnt = 0x0, Current Package C6 cnt = 0x0
>  Prev Package C7 cnt = 0x0, Current Package C7 cnt = 0x0
>  Prev Package C8 cnt = 0x0, Current Package C8 cnt = 0x0
>  Prev Package C9 cnt = 0x0, Current Package C9 cnt = 0x0
>  Prev Package C10 cnt = 0x0, Current Package C10 cnt = 0x0
> 
> With this log, we can know whether it's a stuck PC2 issue, and we can
> check whether the short LTR from device causes the issue.
> 
> Signed-off-by: Kane Chen <kane.chen@...el.com>
> ---
>  drivers/platform/x86/intel/pmc/core.c | 47 ++++++++++++++++++++-------
>  drivers/platform/x86/intel/pmc/core.h |  7 ++--
>  2 files changed, 41 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
> index 8f9c036809c79..b8910b671667e 100644
> --- a/drivers/platform/x86/intel/pmc/core.c
> +++ b/drivers/platform/x86/intel/pmc/core.c
> @@ -1389,6 +1389,15 @@ static int pmc_core_probe(struct platform_device *pdev)
>  		return -ENOMEM;
>  	pmcdev->pmcs[PMC_IDX_MAIN] = primary_pmc;
>  
> +	/* The last element in msr_map is empty */
> +	pmcdev->num_of_pkgc = ARRAY_SIZE(msr_map) - 1;
> +	pmcdev->pkgc_res_cnt = devm_kcalloc(&pdev->dev,
> +					    pmcdev->num_of_pkgc,
> +					    sizeof(*pmcdev->pkgc_res_cnt),
> +					    GFP_KERNEL);
> +	if (!pmcdev->pkgc_res_cnt)
> +		return -ENOMEM;
> +
>  	/*
>  	 * Coffee Lake has CPU ID of Kaby Lake and Cannon Lake PCH. So here
>  	 * Sunrisepoint PCH regmap can't be used. Use Cannon Lake PCH regmap
> @@ -1432,6 +1441,7 @@ static __maybe_unused int pmc_core_suspend(struct device *dev)
>  {
>  	struct pmc_dev *pmcdev = dev_get_drvdata(dev);
>  	struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN];
> +	int i;

unsigned int.

>  	if (pmcdev->suspend)
>  		pmcdev->suspend(pmcdev);
> @@ -1440,9 +1450,11 @@ static __maybe_unused int pmc_core_suspend(struct device *dev)
>  	if (pm_suspend_via_firmware())
>  		return 0;
>  
> -	/* Save PC10 residency for checking later */
> -	if (rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter))
> -		return -EIO;
> +	/* Save PKGC residency for checking later */
> +	for (i = 0; i < pmcdev->num_of_pkgc; i++) {
> +		if (rdmsrl_safe(msr_map[i].bit_mask, &pmcdev->pkgc_res_cnt[i]))
> +			return -EIO;
> +	}
>  
>  	/* Save S0ix residency for checking later */
>  	if (pmc_core_dev_state_get(pmc, &pmcdev->s0ix_counter))
> @@ -1451,14 +1463,15 @@ static __maybe_unused int pmc_core_suspend(struct device *dev)
>  	return 0;
>  }
>  
> -static inline bool pmc_core_is_pc10_failed(struct pmc_dev *pmcdev)
> +static inline bool pmc_core_is_deepest_pkgc_failed(struct pmc_dev *pmcdev)
>  {
> -	u64 pc10_counter;
> +	u32 deepest_pkgc_msr = msr_map[pmcdev->num_of_pkgc - 1].bit_mask;
> +	u64 deepest_pkgc_residency;
>  
> -	if (rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter))
> +	if (rdmsrl_safe(deepest_pkgc_msr, &deepest_pkgc_residency))
>  		return false;
>  
> -	if (pc10_counter == pmcdev->pc10_counter)
> +	if (deepest_pkgc_residency == pmcdev->pkgc_res_cnt[pmcdev->num_of_pkgc - 1])
>  		return true;
>  
>  	return false;
> @@ -1497,10 +1510,22 @@ int pmc_core_resume_common(struct pmc_dev *pmcdev)
>  	if (!warn_on_s0ix_failures)
>  		return 0;
>  
> -	if (pmc_core_is_pc10_failed(pmcdev)) {
> -		/* S0ix failed because of PC10 entry failure */
> -		dev_info(dev, "CPU did not enter PC10!!! (PC10 cnt=0x%llx)\n",
> -			 pmcdev->pc10_counter);
> +	if (pmc_core_is_deepest_pkgc_failed(pmcdev)) {
> +		/* S0ix failed because of deepest PKGC entry failure */
> +		dev_info(dev, "CPU did not enter %s!!! (%s cnt=0x%llx)\n",
> +			 msr_map[pmcdev->num_of_pkgc - 1].name,
> +			 msr_map[pmcdev->num_of_pkgc - 1].name,
> +			 pmcdev->pkgc_res_cnt[pmcdev->num_of_pkgc - 1]);
> +
> +		for (i = 0; i < pmcdev->num_of_pkgc; i++) {
> +			u64 pc_cnt;
> +
> +			if (!rdmsrl_safe(msr_map[i].bit_mask, &pc_cnt)) {
> +				dev_info(dev, "Prev %s cnt = 0x%llx, Current %s cnt = 0x%llx\n",
> +					 msr_map[i].name, pmcdev->pkgc_res_cnt[i],
> +					 msr_map[i].name, pc_cnt);
> +			}
> +		}
>  		return 0;
>  	}
>  
> diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h
> index 54137faaae2b2..fd7ae76984ac7 100644
> --- a/drivers/platform/x86/intel/pmc/core.h
> +++ b/drivers/platform/x86/intel/pmc/core.h
> @@ -385,7 +385,8 @@ struct pmc {
>   * @pmc_xram_read_bit:	flag to indicate whether PMC XRAM shadow registers
>   *			used to read MPHY PG and PLL status are available
>   * @mutex_lock:		mutex to complete one transcation
> - * @pc10_counter:	PC10 residency counter
> + * @pkgc_res_cnt:	PKGC residency counter

Array of PKGC residency counters


With those addressed, feel free to add:

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@...ux.intel.com>


-- 
 i.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ