lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <xpbwa63z7w6fa7mykmhhd56vdm3746y2x7vvtp5xj2st5aupvz@4rrj7gxzu66c>
Date:   Thu, 7 Dec 2023 11:53:07 +0100
From:   Daniel Wagner <dwagner@...e.de>
To:     Christoph Hellwig <hch@....de>
Cc:     linux-nvme@...ts.infradead.org, linux-kernel@...r.kernel.org,
        Keith Busch <kbusch@...nel.org>,
        Sagi Grimberg <sagi@...mberg.me>,
        Hannes Reinecke <hare@...e.de>
Subject: Re: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head

On Wed, Dec 06, 2023 at 09:54:36AM +0100, Christoph Hellwig wrote:
> On Wed, Dec 06, 2023 at 09:12:43AM +0100, Daniel Wagner wrote:
> > Move the namesapce info to struct nvme_ns_head, because it's the same
> > for all associated namespaces.
> > 
> > The head pointer is accessible from the ns pointer so we could just
> > update all places with ns->x to ns->head->x. While this is okay for the
> > slow path,
> 
> Do you have any data to show that it matters?  All the I/O command
> setup functions already access the ns_head for ->ns_id, so looking
> at more fields can't really make any difference.

I've splitted the patch so that the first patch just moves the variables
around and changes ns->x to ns->head->x ('patched'). Then I changed the
layout of nvme_ns_head, so that all variables used in nvme_setup_rw()
are in one cacheline ('cache line optimized') and the last change is
passing the nvme_ns_head pointer around ('use nvme_ns_head directly')

I assume that nvme_setup_rw is the function which is used most and thus
I've tried to benchmark nvme_setup_rw with issuing 4k reads. I am sure
my benchmark setup is not perfect but well that's what I have.

Anyway, the results are pointing towards that moving the variables to
nvme_ns_head has a slight performance impact but that can be more than
mitigated by optimizing the cacheline access. The change to use
nvme_ns_head directly seems to eat up all the cacheline optimization
gains again.

'patched' layout:

struct nvme_ns_head {
	struct list_head           list;                 /*     0    16 */
	struct srcu_struct         srcu;                 /*    16    72 */
	/* --- cacheline 1 boundary (64 bytes) was 24 bytes ago --- */
	struct nvme_subsystem *    subsys;               /*    88     8 */
	unsigned int               ns_id;                /*    96     4 */
	struct nvme_ns_ids         ids;                  /*   100    41 */

	/* XXX 3 bytes hole, try to pack */

	/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
	struct list_head           entry;                /*   144    16 */
	struct kref                ref;                  /*   160     4 */
	bool                       shared;               /*   164     1 */

	/* XXX 3 bytes hole, try to pack */

	int                        instance;             /*   168     4 */

	/* XXX 4 bytes hole, try to pack */

	struct nvme_effects_log *  effects;              /*   176     8 */
	int                        lba_shift;            /*   184     4 */
	u16                        ms;                   /*   188     2 */
	u16                        pi_size;              /*   190     2 */
	/* --- cacheline 3 boundary (192 bytes) --- */
	u16                        sgs;                  /*   192     2 */

	/* XXX 2 bytes hole, try to pack */

	u32                        sws;                  /*   196     4 */
	u64                        nuse;                 /*   200     8 */
	u8                         pi_type;              /*   208     1 */
	u8                         guard_type;           /*   209     1 */

	/* XXX 6 bytes hole, try to pack */

	u64                        zsze;                 /*   216     8 */
	unsigned long              features;             /*   224     8 */
	struct ratelimit_state     rs_nuse;              /*   232   104 */
	/* --- cacheline 5 boundary (320 bytes) was 16 bytes ago --- */

[...]
}


'cacheline optimized' layout:

struct nvme_ns_head {
	struct list_head           list;                 /*     0    16 */
	struct srcu_struct         srcu;                 /*    16    72 */
	/* --- cacheline 1 boundary (64 bytes) was 24 bytes ago --- */
	struct nvme_subsystem *    subsys;               /*    88     8 */
	struct nvme_ns_ids         ids;                  /*    96    41 */

	/* XXX 7 bytes hole, try to pack */

	/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
	struct list_head           entry;                /*   144    16 */
	struct kref                ref;                  /*   160     4 */
	bool                       shared;               /*   164     1 */

	/* XXX 3 bytes hole, try to pack */

	int                        instance;             /*   168     4 */

	/* XXX 4 bytes hole, try to pack */

	struct nvme_effects_log *  effects;              /*   176     8 */
	u64                        nuse;                 /*   184     8 */
	/* --- cacheline 3 boundary (192 bytes) --- */
	unsigned int               ns_id;                /*   192     4 */
	int                        lba_shift;            /*   196     4 */
	u16                        ms;                   /*   200     2 */
	u16                        pi_size;              /*   202     2 */
	u8                         pi_type;              /*   204     1 */
	u8                         guard_type;           /*   205     1 */
	u16                        sgs;                  /*   206     2 */
	u32                        sws;                  /*   208     4 */
[...]
}

fio test job:
  [global]
  name=nvme-read
  time_based
  ramp_time=30
  runtime=120
  readwrite=read
  bs=4k
  ioengine=io_uring
  direct=1
  numjobs=4
  iodepth=64
  group_reporting=1[nvme0]
  new_group
  filename=/dev/nvme0n1
  cpus_allowed=1-4
  cpus_allowed_policy=split

  [nvme0]
  new_group
  filename=/dev/nvme0n1

bandwidth
      'baseline'  'patched'  'cache line optimized' 'use nvme_ns_head directly'
      1608        1632       1613                   1618
      1608        1610       1634                   1618
      1623        1639       1642                   1646
      1638        1610       1640                   1619
      1637        1611       1642                   1620
avg   1622.8      1620.4     1634.2                 1624.2
stdev 14.75       14.01      12.29                  12.21

ios
      'baseline'  'patched'  'cache line optimized' 'use nvme_ns_head directly'
      65626946    66735998   66268893               66458877
      65641469    66041634   66888910               66384526
      66012335    66904002   67132768               67329550
      66589757    66013222   67132053               66491121
      66569213    66033040   67132075               66474708
avg   66087944    66345579.2 66910939.8             66627756.4
stdev 474608.24   437260.67  374068.50              394426.34

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ