lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20090220171132.GK6960@linux.vnet.ibm.com>
Date:	Fri, 20 Feb 2009 09:11:32 -0800
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	Arnaldo Carvalho de Melo <acme@...hat.com>
Cc:	Ingo Molnar <mingo@...e.hu>, Steven Rostedt <rostedt@...dmis.org>,
	Frédéric Weisbecker <fweisbec@...il.com>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: Packable data structures found by pahole

On Wed, Feb 11, 2009 at 11:50:32AM -0200, Arnaldo Carvalho de Melo wrote:
> Em Wed, Feb 11, 2009 at 01:22:36PM +0100, Ingo Molnar escreveu:
> > Is there anything packable in core kernel structures like task struct?
> 
> I still haven't added an heuristic to avoid reporting members with
> explicit __alignment attributes, as these are not encoded in DWARF. I'll
> work on that soon, but till then we can use this as an starting point.
> 
> struct name, current size, --reorganized size, savings
> 
> $ pahole --packable ../build/blkftrace/vmlinux | sort -k4 -nr
> vc_data                      432   176 256
>    is this exported to userspace?
> 
> rcu_ctrlblk                  128    64  64
>    has ____cacheline_internodealigned_in_smp

For 900 bytes of memory-footprint reduction on uniprocessor builds,
I suggest rcutiny.c (http://lkml.org/lkml/2009/2/3/333).

That said, I don't know of any functional problems that would result
from packing rcu_ctrlblk.

						Thanx, Paul

> timex                        208   152  56
>    syscall interface
> 
> hh_cache                     128    72  56
>    has ____cacheline_aligned_in_smp
> 
> cpu_workqueue_struct         128    72  56
>    is ____cacheline_aligned
> 
> rchan_buf                    256   216  40
>    is ____cacheline_aligned
> 
> tty_struct                  1328  1296  32
>    this one doesn't have any annotation, looks ripe for --reorganize
> 
> task_struct                 6008  5976  32
> 
>    Printing this one here, the rest of the possibly packable data
> structures are after it:
> 
> struct task_struct {
> 	volatile long int          state;                /*     0     8 */
> 	void *                     stack;                /*     8     8 */
> 	atomic_t                   usage;                /*    16     4 */
> 	unsigned int               flags;                /*    20     4 */
> 	unsigned int               ptrace;               /*    24     4 */
> 	int                        lock_depth;           /*    28     4 */
> 	int                        prio;                 /*    32     4 */
> 	int                        static_prio;          /*    36     4 */
> 	int                        normal_prio;          /*    40     4 */
> 	unsigned int               rt_priority;          /*    44     4 */
> 	const struct sched_class  * sched_class;         /*    48     8 */
> 	struct sched_entity        se;                   /*    56   368 */
> 	/* --- cacheline 6 boundary (384 bytes) was 40 bytes ago --- */
> 	struct sched_rt_entity     rt;                   /*   424    64 */
> 	/* --- cacheline 7 boundary (448 bytes) was 40 bytes ago --- */
> 	unsigned char              fpu_counter;          /*   488     1 */
> 	s8                         oomkilladj;           /*   489     1 */
> 
> 	/* XXX 2 bytes hole, try to pack */
> 
> 	unsigned int               btrace_seq;           /*   492     4 */
> 	unsigned int               policy;               /*   496     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	cpumask_t                  cpus_allowed;         /*   504     8 */
> 	/* --- cacheline 8 boundary (512 bytes) --- */
> 	struct sched_info          sched_info;           /*   512    40 */
> 
> 	/* XXX last struct has 4 bytes of padding */
> 
> 	struct list_head           tasks;                /*   552    16 */
> 	struct plist_node          pushable_tasks;       /*   568    40 */
> 	/* --- cacheline 9 boundary (576 bytes) was 32 bytes ago --- */
> 	struct mm_struct *         mm;                   /*   608     8 */
> 	struct mm_struct *         active_mm;            /*   616     8 */
> 	struct linux_binfmt *      binfmt;               /*   624     8 */
> 	int                        exit_state;           /*   632     4 */
> 	int                        exit_code;            /*   636     4 */
> 	/* --- cacheline 10 boundary (640 bytes) --- */
> 	int                        exit_signal;          /*   640     4 */
> 	int                        pdeath_signal;        /*   644     4 */
> 	unsigned int               personality;          /*   648     4 */
> 	unsigned int               did_exec:1;           /*   652:31  4 */
> 
> 	/* XXX 31 bits hole, try to pack */
> 
> 	pid_t                      pid;                  /*   656     4 */
> 	pid_t                      tgid;                 /*   660     4 */
> 	long unsigned int          stack_canary;         /*   664     8 */
> 	struct task_struct *       real_parent;          /*   672     8 */
> 	struct task_struct *       parent;               /*   680     8 */
> 	struct list_head           children;             /*   688    16 */
> 	/* --- cacheline 11 boundary (704 bytes) --- */
> 	struct list_head           sibling;              /*   704    16 */
> 	struct task_struct *       group_leader;         /*   720     8 */
> 	struct list_head           ptraced;              /*   728    16 */
> 	struct list_head           ptrace_entry;         /*   744    16 */
> 	struct bts_tracer *        bts;                  /*   760     8 */
> 	/* --- cacheline 12 boundary (768 bytes) --- */
> 	void *                     bts_buffer;           /*   768     8 */
> 	size_t                     bts_size;             /*   776     8 */
> 	struct pid_link            pids[3];              /*   784    72 */
> 	/* --- cacheline 13 boundary (832 bytes) was 24 bytes ago --- */
> 	struct list_head           thread_group;         /*   856    16 */
> 	struct completion *        vfork_done;           /*   872     8 */
> 	int *                      set_child_tid;        /*   880     8 */
> 	int *                      clear_child_tid;      /*   888     8 */
> 	/* --- cacheline 14 boundary (896 bytes) --- */
> 	cputime_t                  utime;                /*   896     8 */
> 	cputime_t                  stime;                /*   904     8 */
> 	cputime_t                  utimescaled;          /*   912     8 */
> 	cputime_t                  stimescaled;          /*   920     8 */
> 	cputime_t                  gtime;                /*   928     8 */
> 	cputime_t                  prev_utime;           /*   936     8 */
> 	cputime_t                  prev_stime;           /*   944     8 */
> 	long unsigned int          nvcsw;                /*   952     8 */
> 	/* --- cacheline 15 boundary (960 bytes) --- */
> 	long unsigned int          nivcsw;               /*   960     8 */
> 	struct timespec            start_time;           /*   968    16 */
> 	struct timespec            real_start_time;      /*   984    16 */
> 	long unsigned int          min_flt;              /*  1000     8 */
> 	long unsigned int          maj_flt;              /*  1008     8 */
> 	struct task_cputime        cputime_expires;      /*  1016    24 */
> 	/* --- cacheline 16 boundary (1024 bytes) was 16 bytes ago --- */
> 	struct list_head           cpu_timers[3];        /*  1040    48 */
> 	/* --- cacheline 17 boundary (1088 bytes) --- */
> 	const struct cred  *       real_cred;            /*  1088     8 */
> 	const struct cred  *       cred;                 /*  1096     8 */
> 	struct mutex               cred_exec_mutex;      /*  1104    32 */
> 	char                       comm[16];             /*  1136    16 */
> 	/* --- cacheline 18 boundary (1152 bytes) --- */
> 	int                        link_count;           /*  1152     4 */
> 	int                        total_link_count;     /*  1156     4 */
> 	struct sysv_sem            sysvsem;              /*  1160     8 */
> 	long unsigned int          last_switch_count;    /*  1168     8 */
> 	struct thread_struct       thread;               /*  1176   208 */
> 
> 	/* XXX last struct has 4 bytes of padding */
> 
> 	/* --- cacheline 21 boundary (1344 bytes) was 40 bytes ago --- */
> 	struct fs_struct *         fs;                   /*  1384     8 */
> 	struct files_struct *      files;                /*  1392     8 */
> 	struct nsproxy *           nsproxy;              /*  1400     8 */
> 	/* --- cacheline 22 boundary (1408 bytes) --- */
> 	struct signal_struct *     signal;               /*  1408     8 */
> 	struct sighand_struct *    sighand;              /*  1416     8 */
> 	sigset_t                   blocked;              /*  1424     8 */
> 	sigset_t                   real_blocked;         /*  1432     8 */
> 	sigset_t                   saved_sigmask;        /*  1440     8 */
> 	struct sigpending          pending;              /*  1448    24 */
> 	/* --- cacheline 23 boundary (1472 bytes) --- */
> 	long unsigned int          sas_ss_sp;            /*  1472     8 */
> 	size_t                     sas_ss_size;          /*  1480     8 */
> 	int                        (*notifier)(void *);  /*  1488     8 */
> 	void *                     notifier_data;        /*  1496     8 */
> 	sigset_t *                 notifier_mask;        /*  1504     8 */
> 	struct audit_context *     audit_context;        /*  1512     8 */
> 	uid_t                      loginuid;             /*  1520     4 */
> 	unsigned int               sessionid;            /*  1524     4 */
> 	seccomp_t                  seccomp;              /*  1528     4 */
> 	u32                        parent_exec_id;       /*  1532     4 */
> 	/* --- cacheline 24 boundary (1536 bytes) --- */
> 	u32                        self_exec_id;         /*  1536     4 */
> 	spinlock_t                 alloc_lock;           /*  1540     4 */
> 	spinlock_t                 pi_lock;              /*  1544     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	struct plist_head          pi_waiters;           /*  1552    32 */
> 	struct rt_mutex_waiter *   pi_blocked_on;        /*  1584     8 */
> 	unsigned int               irq_events;           /*  1592     4 */
> 	int                        hardirqs_enabled;     /*  1596     4 */
> 	/* --- cacheline 25 boundary (1600 bytes) --- */
> 	long unsigned int          hardirq_enable_ip;    /*  1600     8 */
> 	unsigned int               hardirq_enable_event; /*  1608     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	long unsigned int          hardirq_disable_ip;   /*  1616     8 */
> 	unsigned int               hardirq_disable_event; /*  1624     4 */
> 	int                        softirqs_enabled;     /*  1628     4 */
> 	long unsigned int          softirq_disable_ip;   /*  1632     8 */
> 	unsigned int               softirq_disable_event; /*  1640     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	long unsigned int          softirq_enable_ip;    /*  1648     8 */
> 	unsigned int               softirq_enable_event; /*  1656     4 */
> 	int                        hardirq_context;      /*  1660     4 */
> 	/* --- cacheline 26 boundary (1664 bytes) --- */
> 	int                        softirq_context;      /*  1664     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	void *                     journal_info;         /*  1672     8 */
> 	struct bio *               bio_list;             /*  1680     8 */
> 	struct bio * *             bio_tail;             /*  1688     8 */
> 	struct reclaim_state *     reclaim_state;        /*  1696     8 */
> 	struct backing_dev_info *  backing_dev_info;     /*  1704     8 */
> 	struct io_context *        io_context;           /*  1712     8 */
> 	long unsigned int          ptrace_message;       /*  1720     8 */
> 	/* --- cacheline 27 boundary (1728 bytes) --- */
> 	siginfo_t *                last_siginfo;         /*  1728     8 */
> 	struct task_io_accounting  ioac;                 /*  1736    56 */
> 	/* --- cacheline 28 boundary (1792 bytes) --- */
> 	u64                        acct_rss_mem1;        /*  1792     8 */
> 	u64                        acct_vm_mem1;         /*  1800     8 */
> 	cputime_t                  acct_timexpd;         /*  1808     8 */
> 	nodemask_t                 mems_allowed;         /*  1816    64 */
> 	/* --- cacheline 29 boundary (1856 bytes) was 24 bytes ago --- */
> 	int                        cpuset_mems_generation; /*  1880     4 */
> 	int                        cpuset_mem_spread_rotor; /*  1884     4 */
> 	struct css_set *           cgroups;              /*  1888     8 */
> 	struct list_head           cg_list;              /*  1896    16 */
> 	struct robust_list_head *  robust_list;          /*  1912     8 */
> 	/* --- cacheline 30 boundary (1920 bytes) --- */
> 	struct compat_robust_list_head * compat_robust_list; /*  1920     8 */
> 	struct list_head           pi_state_list;        /*  1928    16 */
> 	struct futex_pi_state *    pi_state_cache;       /*  1944     8 */
> 	struct perf_counter_context perf_counter_ctx;    /*  1952    80 */
> 	/* --- cacheline 31 boundary (1984 bytes) was 48 bytes ago --- */
> 	struct mempolicy *         mempolicy;            /*  2032     8 */
> 	short int                  il_next;              /*  2040     2 */
> 
> 	/* XXX 2 bytes hole, try to pack */
> 
> 	atomic_t                   fs_excl;              /*  2044     4 */
> 	/* --- cacheline 32 boundary (2048 bytes) --- */
> 	struct rcu_head            rcu;                  /*  2048    16 */
> 	struct pipe_inode_info *   splice_pipe;          /*  2064     8 */
> 	struct task_delay_info *   delays;               /*  2072     8 */
> 	struct prop_local_single   dirties;              /*  2080    24 */
> 	int                        latency_record_count; /*  2104     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	/* --- cacheline 33 boundary (2112 bytes) --- */
> 	struct latency_record      latency_record[32];   /*  2112  3840 */
> 	/* --- cacheline 93 boundary (5952 bytes) --- */
> 	long unsigned int          timer_slack_ns;       /*  5952     8 */
> 	long unsigned int          default_timer_slack_ns; /*  5960     8 */
> 	struct list_head *         scm_work_list;        /*  5968     8 */
> 	int                        curr_ret_stack;       /*  5976     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	struct ftrace_ret_stack *  ret_stack;            /*  5984     8 */
> 	atomic_t                   trace_overrun;        /*  5992     4 */
> 	atomic_t                   tracing_graph_pause;  /*  5996     4 */
> 	long unsigned int          trace;                /*  6000     8 */
> 
> 	/* size: 6008, cachelines: 94, members: 148 */
> 	/* sum members: 5976, holes: 9, sum holes: 32 */
> 	/* bit holes: 1, sum bit holes: 31 bits */
> 	/* paddings: 2, sum paddings: 8 */
> 	/* last cacheline: 56 bytes */
> };	/* definitions: 742 */
> 
> If we ask pahole to reorganize it it would do these steps:
> 
> $ pahole -C task_struct --reorganize --show_reorg_steps kernel/sched.o|grep ^\/
> /* Demoting bitfield ('did_exec' ... 'did_exec') from 'unsigned int' to
>  * 'unsigned char' */
> 
> /* Moving bitfield('did_exec' ... 'did_exec') from after 'personality'
>  * to after 'oomkilladj' */
> 
> /* Moving 'personality' from after 'pdeath_signal' to after 'policy' */
> 
> /* Moving 'hardirq_enable_event' from after 'hardirq_enable_ip' to after
>  * 'pi_lock' */
> 
> /* Moving 'softirq_context' from after 'hardirq_context' to after
>  * 'softirq_disable_event' */
> 
> /* Moving 'curr_ret_stack' from after 'scm_work_list' to after
>  * 'latency_record_count' */
> 
> And the new stats would be:
> 
> 	/* size: 5976, cachelines: 94, members: 148 */
> 	/* sum members: 5973, holes: 2, sum holes: 3 */
> 	/* bit holes: 1, sum bit holes: 7 bits */
> 	/* paddings: 2, sum paddings: 8 */
> 	/* last cacheline: 24 bytes */
> };   /* saved 32 bytes! */
> 
> It would still have these holes/paddings:
> 
> 	<SNIP>
> 
> 	s8                    oomkilladj;           /*   489     1 */
> 	unsigned char         did_exec:1;           /*   490: 7  1 */
> 
> 	/* XXX 7 bits hole, try to pack */
> 	/* XXX 1 byte hole, try to pack */
> 
> 	unsigned int               btrace_seq;      /*   492     4 */
> 
> 	<SNIP>
> 
> 	/* --- cacheline 8 boundary (512 bytes) --- */
> 	struct sched_info     sched_info;           /*   512    40 */
> 
> 	/* XXX last struct has 4 bytes of padding */
> 
> 	struct list_head      tasks;                /*   552    16 */
> 
> 	<SNIP>
> 
> 	long unsigned int     last_switch_count;    /*  1160     8 */
> 	struct thread_struct  thread;               /*  1168   208 */
> 
> 	/* XXX last struct has 4 bytes of padding */
> 
> 	/* --- cacheline 21 boundary (1344 bytes) was 32 bytes ago --- */
> 
> 	<SNIP>
> 
> 	/* --- cacheline 31 boundary (1984 bytes) was 24 bytes ago --- */
> 	struct mempolicy *    mempolicy;            /*  2008     8 */
> 	short int             il_next;              /*  2016     2 */
> 
> 	/* XXX 2 bytes hole, try to pack */
> 
> 	atomic_t              fs_excl;              /*  2020     4 */
> 
> 	<SNIP>
> 
> I put the pahole vmlinux output on
> http://fedorapeople.org/~acme/pahole/vmlinux.pahole.c
> 
> zone                        1536  1512  24
> super_block                  768   744  24
> Scsi_Host                   1384  1360  24
> scsi_device                 1312  1288  24
> rq                          2456  2432  24
> request_queue               2272  2248  24
> net_device                  1600  1576  24
> cp_private                  1344  1320  24
> clocksource                  192   168  24
> ata_port                   11184 11160  24
> taskstats                    328   312  16
> sock                         544   528  16
> rtl8139_private              448   432  16
> rtentry                      120   104  16
> pci_dev                     1624  1608  16
> packet_sock                  760   744  16
> mtd_info                     352   336  16
> mousedev                     784   768  16
> module                       512   496  16
> mm_struct                    808   792  16
> loop_device                  400   384  16
> journal_s                    568   552  16
> gendisk                      720   704  16
> floppy_drive_params          128   112  16
> files_struct                 704   688  16
> dio                          856   840  16
> block_device                 248   232  16
> audit_context               1968  1952  16
> xfrm_state                   632   624   8
> writeback_control             64    56   8
> vt_spawn_console              24    16   8
> vmap_block_queue              48    40   8
> vfsmount                     224   216   8
> user_struct                   96    88   8
> unix_skb_parms                32    24   8
> unity_map_entry               48    40   8
> uart_port                    200   192   8
> tty_ldisc_ops                144   136   8
> tty_bufhead                  152   144   8
> tty_audit_buf                 72    64   8
> transaction_s                168   160   8
> tick_sched                   248   240   8
> thread_struct                208   200   8
> sysfs_dirent                  80    72   8
> sk_buff                      192   184   8
> signal_struct                944   936   8
> sighand_struct              2088  2080   8
> sg_io_hdr                     88    80   8
> serio                        704   696   8
> semid_ds                      88    80   8
> scsi_target                  616   608   8
> scsi_pointer                  64    56   8
> scm_cookie                    40    32   8
> rt_rq                       1760  1752   8
> rtc_device                   744   736   8
> root_domain                 1704  1696   8
> ring_buffer_per_cpu          112   104   8
> ring_buffer                   72    64   8
> request                      368   360   8
> rchan                        376   368   8
> psmouse_protocol              48    40   8
> proto                        336   328   8
> protection_domain             48    40   8
> prop_local_percpu             64    56   8
> proc_dir_entry               160   152   8
> power_supply                 112   104   8
> pnp_card                     632   624   8
> platform_device              520   512   8
> pid_namespace               2112  2104   8
> pglist_data                80576 80568   8
> perf_counter_context          80    72   8
> perf_counter                4408  4400   8
> pci_root_info                 40    32   8
> old_serial_port               40    32   8
> net                          592   584   8
> neigh_table                  472   464   8
> neighbour                    240   232   8
> ncp_mount_data_v4             80    72   8
> mtd_oob_ops                   64    56   8
> msghdr                        56    48   8
> mnt_namespace                 64    56   8
> ml_device                    888   880   8
> loop_info                    168   160   8
> kprobe                       128   120   8
> kparam_array                  48    40   8
> kmem_cache                  4352  4344   8
> irq_desc                     192   184   8
> ip_sf_list                    40    32   8
> ip_mc_list                   168   160   8
> ipc_namespace                296   288   8
> input_dev                   2352  2344   8
> inode                        560   552   8
> inet_timewait_death_row      568   560   8
> inet6_ifaddr                 184   176   8
> in_device                    376   368   8
> i387_soft_struct             136   128   8
> hrtimer_cpu_base             160   152   8
> hid_field                    112   104   8
> hid_device                  7144  7136   8
> gen_estimator                112   104   8
> fs_quota_stat                 80    72   8
> floppy_write_errors           40    32   8
> floppy_fdc_state              40    32   8
> flock                         32    24   8
> fb_info                      712   704   8
> ext3_sb_info                 440   432   8
> ext3_inode_info              768   760   8
> dquot                        232   224   8
> cpuinfo_x86                  192   184   8
> clock_event_device           128   120   8
> cdrom_generic_command         64    56   8
> cache_detail                 224   216   8
> bsg_device                   160   152   8
> bsg_class_device              48    40   8
> blk_user_trace_setup          72    64   8
> blk_trace                     96    88   8
> blkcipher_walk               112   104   8
> audit_watch                   72    64   8
> atkbd                       1488  1480   8
> ata_queued_cmd               224   216   8
> ata_host                      72    64   8
> ata_device                  1168  1160   8
> as_io_context                104    96   8
> amd_iommu                    120   112   8
> agp_kern_info                 80    72   8
> agp_bridge_data              200   192   8
> acpi_thermal                1472  1464   8
> acpi_pscope_state             56    48   8
> acpi_prt_entry                48    40   8
> acpi_processor_power        2112  2104   8
> acpi_processor_performance   112   104   8
> acpi_processor_cx            136   128   8
> acpi_blacklist_item           56    48   8
> tty_port                     136   132   4
> scsi_host_cmd_pool            48    44   4
> rtentry32                     84    80   4
> msqid_ds                     104   100   4
> inotify_watch                 64    60   4
> in6_rtmsg                     80    76   4
> fown_struct                   32    28   4
> fib_iter_state                56    52   4
> entropy_store                 56    52   4
> compat_ncp_mount_data         56    52   4
> compat_loop_info             140   136   4
> compat_floppy_fdc_state       32    28   4
> compat_floppy_drive_params    88    84   4
> agp_allocate                  24    20   4
> acpi_parse_obj_named          72    68   4
> fb_monspecs                  144   141   3
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ