#!/usr/bin/ruby # quick script to estimate how space is wasted in an _i386_ binary due to # duplicate functions # # Mitchell Blank Jr 20060104 # # Note: since we parse the output of "objdump", it might fail miserably if # your binutils don't match mine. This is tested on objdump version # "2.17.50.0.3-6 20060715" as provided by Fedora Core 6 abort "Usage: objdump --disassemble vmlinux | #$0" if STDIN.tty? require 'digest/md5' HASHFUNC = Digest::MD5 # require 'digest/sha1' # HASHFUNC = Digest::SHA1 class Func attr_reader :name, :bytes, :hash @@accumulator = nil def initialize(name) @name = name @bytes = 0 @@accumulator = HASHFUNC.new end def <<(hexarr) # add an array of strings (each holding a hex digit) to func b = "" hexarr.each { |xd| b << xd.hex.chr } @bytes += b.length @@accumulator << b end def finish @hash = @@accumulator.digest @@accumulator = nil ### print "Added #@name (#@bytes)\n" self end end funcs = [] curfunc = nil STDIN.each { |line| line.chomp! case line when /^[[:xdigit:]]{8} <(\S+)>:$/ # Start of a new function funcs << curfunc.finish if curfunc curfunc = Func.new($1) when /^[[:xdigit:]]{8}:\s*((\s[[:xdigit:]]{2})+)\s.*/ # An opcode -- add its bytes to the current function hex = $1.split if (hex.length >= 5 && line =~ /.*\s([[:xdigit:]]{8})\s+<(\w+)(\+0x[[:xdigit:]])?>$/ && curfunc.name != $2) # Evil hack -- it seems this opcode references a location outside this # function. For comparison purposes we want to pretend the opcode was # referencing the absolute target address. Otherwise, two identical # functions that both do "call otherfunc" will appear to be different # because the call opcode will have different relative addresses # # It's also actually possible to also have false-positives without this # transformation: in the kernel sys_send() is a simple wrapper around # sys_sendto() and sys_recv() is an identical wrapper around # sys_recvfrom(). In my kernel the relative difference in the function # addresses are the same so without this hack it thinks that sys_send() # and sys_recv() are identical! ### print "#{line}\n" ### print "BEFORE: #{hex.join(' ')}\n" hex[-4, 4] = $1.match("([[:xdigit:]]{2})" * 4).captures.reverse ### print "AFTER: #{hex.join(' ')}\n" end curfunc << hex when /.*file format (.*)$/ abort "can't handle #$1 file format" unless $1 == "elf32-i386" when "", /^Disassembly of .*/ # do nothing else abort "Can't parse \"#{line}\"!\n" end } funcs << curfunc.finish if curfunc curfunc = nil print "Duplicated functions:\n" totalbytes = 0 byhash = {} funcs.each { |func| totalbytes += func.bytes (byhash[func.hash] ||= []) << func } saved = 0 dups = 0 instances = 0 byhash.values.delete_if { |arr| arr.length < 2 }.each { |arr| bytes = arr[0].bytes arr.collect! { |func| func.name } print " #{bytes} bytes: #{arr.join(', ')}\n" dups += 1 instances += arr.length - 1 saved += (arr.length - 1) * bytes } print "\nFound #{instances} instances of #{dups} duplicated functions\n" print "Possible savings: #{saved} bytes (of #{totalbytes} total)\n"