lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aG1sqKBJSfHydDsx@agluck-desk3>
Date: Tue, 8 Jul 2025 12:08:24 -0700
From: "Luck, Tony" <tony.luck@...el.com>
To: Reinette Chatre <reinette.chatre@...el.com>
Cc: Fenghua Yu <fenghuay@...dia.com>,
	Maciej Wieczor-Retman <maciej.wieczor-retman@...el.com>,
	Peter Newman <peternewman@...gle.com>,
	James Morse <james.morse@....com>, Babu Moger <babu.moger@....com>,
	Drew Fustini <dfustini@...libre.com>,
	Dave Martin <Dave.Martin@....com>,
	Anil Keshavamurthy <anil.s.keshavamurthy@...el.com>,
	Chen Yu <yu.c.chen@...el.com>, x86@...nel.org,
	linux-kernel@...r.kernel.org, patches@...ts.linux.dev
Subject: Re: [PATCH v6 00/30] x86,fs/resctrl telemetry monitoring

On Thu, Jul 03, 2025 at 10:22:06AM -0700, Luck, Tony wrote:
> On Thu, Jul 03, 2025 at 09:45:15AM -0700, Reinette Chatre wrote:
> > Hi Tony and Dave,
> > 
> > On 6/26/25 9:49 AM, Tony Luck wrote:
> > >  --- 14 ---
> > > Add mon_evt::is_floating_point set by resctrl file system code to limit
> > > which events architecture code can request be displayed in floating point.
> > > 
> > > Simplified the fixed-point to floating point algorithm. Reinette is
> > > correct that the additional "lshift" and "rshift" operations are not
> > > required. All that is needed is to multiply the fixed point fractional
> > > part by 10**decimal_places, add a rounding amount equivalent to a "1"
> > > in the binary place after those supplied. Finally divide by 2**binary_places
> > > (with a right shift).
> > > 
> > > Explained in commit comment how I chose the number of decimal places to
> > > use for each binary places value.
> > > 
> > > N.B. Dave Martin expressed an opinion that the kernel should not do
> > > this conversion. Instead it should enumerate the scaling factor for
> > > each event where hardware reported a fixed point value. This patch
> > > could be dropped and replaced with one to enumerate scaling factors
> > > per event if others agree with Dave.
> > 
> > Could resctrl accommodate both usages? For example, it does not
> > look too invasive to add a second file <mon_evt::name>.raw for the
> > mon_evt::is_floating_point events that can output something like Dave
> > suggested in [1]:
> > 
> > .raw file format could be:
> > 	#format:<output that depends on format>
> > 	#fixed-point:<value>/<scaling factor>
> > 
> > Example output:
> > 	fixed-point:0x60000/0x40000
> 
> Dave: Is that what you want in the ".raw" file? An alternative would be
> to put the format information for non-integer events into an
> "info" file ("info/{RESOURCE_NAME}_MON/monfeatures.raw.formats"?)
> and just put the raw value into the ".raw" file under mon_data.

Note that I thought it easier for users to keep the raw file to just
showing a value, rather than including the formatting details in
Reinette's proposal.

Patch to implement my alternative suggestion below. To the user things
look like this:

$ cd /sys/fs/resctrl/mon_data/mon_PERF_PKG_01
$ cat core_energy
0.02203
$ cat core_energy.raw
5775
$ cat /sys/fs/resctrl/info/PERF_PKG_MON/mon_features_raw_scale
core_energy 262144
activity 262144
$ bc -ql
5775 / 262144
.02202987670898437500

If this seems useful I can write up a commit message and include
as its own patch in v7. Suggestions for better names?

-Tony

---

diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 4704ea7228ca..5ac4e3c98f23 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -90,6 +90,8 @@ extern struct mon_evt mon_event_all[QOS_NUM_EVENTS];
  *                   the event file belongs. When @sum is one this
  *                   is the id of the L3 cache that all domains to be
  *                   summed share.
+ * @raw:             Set for ".raw" files that directly show hardware
+ *                   provided counts with no interpretation.
  *
  * Pointed to by the kernfs kn->priv field of monitoring event files.
  * Readers and writers must hold rdtgroup_mutex.
@@ -100,6 +102,7 @@ struct mon_data {
 	struct mon_evt		*evt;
 	int			domid;
 	bool			sum;
+	bool			raw;
 };
 
 /**
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index 29de0e380ccc..78e7af296d5a 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -753,7 +753,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 		seq_puts(m, "Error\n");
 	else if (rr.err == -EINVAL)
 		seq_puts(m, "Unavailable\n");
-	else if (evt->binary_bits == 0)
+	else if (md->raw || evt->binary_bits == 0)
 		seq_printf(m, "%llu\n", rr.val);
 	else
 		print_event_value(m, evt->binary_bits, rr.val);
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 511362a67532..97786831722a 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -1158,6 +1158,21 @@ static int rdt_mon_features_show(struct kernfs_open_file *of,
 	return 0;
 }
 
+static int rdt_mon_features_raw_scale_show(struct kernfs_open_file *of,
+					   struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+	struct mon_evt *mevt;
+
+	for_each_mon_event(mevt) {
+		if (mevt->rid != r->rid || !mevt->enabled || !mevt->binary_bits)
+			continue;
+		seq_printf(seq, "%s %u\n", mevt->name, 1 << mevt->binary_bits);
+	}
+
+	return 0;
+}
+
 static int rdt_bw_gran_show(struct kernfs_open_file *of,
 			    struct seq_file *seq, void *v)
 {
@@ -1823,6 +1838,13 @@ static struct rftype res_common_files[] = {
 		.seq_show	= rdt_mon_features_show,
 		.fflags		= RFTYPE_MON_INFO,
 	},
+	{
+		.name		= "mon_features_raw_scale",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_mon_features_raw_scale_show,
+		.fflags		= RFTYPE_MON_INFO,
+	},
 	{
 		.name		= "num_rmids",
 		.mode		= 0444,
@@ -2905,7 +2927,7 @@ static void rmdir_all_sub(void)
  */
 static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
 					struct mon_evt *mevt,
-					bool do_sum)
+					bool do_sum, bool rawfile)
 {
 	struct mon_data *priv;
 
@@ -2916,7 +2938,8 @@ static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
 
 	list_for_each_entry(priv, &mon_data_kn_priv_list, list) {
 		if (priv->rid == rid && priv->domid == domid &&
-		    priv->sum == do_sum && priv->evt == mevt)
+		    priv->sum == do_sum && priv->evt == mevt &&
+		    priv->raw == rawfile)
 			return priv;
 	}
 
@@ -2928,6 +2951,7 @@ static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
 	priv->domid = domid;
 	priv->sum = do_sum;
 	priv->evt = mevt;
+	priv->raw = rawfile;
 	list_add_tail(&priv->list, &mon_data_kn_priv_list);
 
 	return priv;
@@ -3078,12 +3102,13 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_domain_hdr *hdr,
 	struct rmid_read rr = {0};
 	struct mon_data *priv;
 	struct mon_evt *mevt;
+	char rawname[64];
 	int ret;
 
 	for_each_mon_event(mevt) {
 		if (mevt->rid != r->rid || !mevt->enabled)
 			continue;
-		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
+		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum, false);
 		if (WARN_ON_ONCE(!priv))
 			return -EINVAL;
 
@@ -3093,6 +3118,18 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_domain_hdr *hdr,
 
 		if (r->rid == RDT_RESOURCE_L3 && !do_sum && resctrl_is_mbm_event(mevt->evtid))
 			mon_event_read(&rr, r, hdr, prgrp, &hdr->cpu_mask, mevt, true);
+
+		if (!mevt->binary_bits)
+			continue;
+
+		sprintf(rawname, "%s.raw", mevt->name);
+		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum, true);
+		if (WARN_ON_ONCE(!priv))
+			return -EINVAL;
+
+		ret = mon_addfile(kn, rawname, priv);
+		if (ret)
+			return ret;
 	}
 
 	return 0;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ