#!/usr/bin/env python import sys, re, getopt class Recorder: regex_data = re.compile("s_dev=(.*) i_ino=(.*) offset=(.*)") regex_data_for_find = re.compile("s_dev=(.*) i_ino=(.*) offset=(.*) (.*)") def __init__(self): self.__processes = {} self.__files = {} self.__records = {} (self.__tprocess, self.__tdev, self.__tinode) = (None, None, None) def get_files(self): return self.__files.values() def get_processes(self): return self.__processes.values() def get_process(self, name): if not name in self.__processes: self.__processes[name] = Process(name) return self.__processes[name] def set_target(self, process, dev, inode): (self.__tprocess, self.__tdev, self.__tinode ) = (process, dev, inode) def get_file(self, dev, inode): key = (dev, inode) if not key in self.__files: self.__files[key] = File(dev, inode) return self.__files[key] def get_records(self): return self.__records def get_record(self, process, dev, inode): if self.__tprocess and self.__tprocess != process or \ self.__tdev and self.__tdev != dev or \ self.__tinode and self.__tinode != inode: return None p = self.get_process(process) f = self.get_file(dev, inode) key = (p, f) if not key in self.__records: self.__records[key] = Record(p, f) return self.__records[key] def record_find_get_page(self, process, data): m = Recorder.regex_data_for_find.match(data) if not m: print "find_get_page() illegal format:%s" % data (dev, inode, index, flag) = m.groups() if flag == "page_found": r = self.get_record(process, dev, inode) if r != None: r.access_to_cached_page(index) r.get_file().add_page(index) def record_add_to_page_cache(self, process, data): m = Recorder.regex_data.match(data) (dev, inode, index) = m.groups() r = self.get_record(process, dev, inode) if r != None: r.add_to_page_cache(index) r.get_file().add_page(index) def record_remove_from_page_cache(self, process, data): m = Recorder.regex_data.match(data) (dev, inode, index) = m.groups() r = self.get_record(process, dev, inode) if r != None: r.remove_from_page_cache(index) r.get_file().remove_page(index) class Record: def __init__(self, process, file): self.__process = process self.__process.add_record(self) self.__file = file self.__file.add_record(self) self.__added_page_list = [] self.__removed_page_list = [] self.__indirect_removed_page_list = [] self.__cached_page_list = [] def is_added(self, index): if index in self.__added_page_list and index in self.__cached_page_list: return True else: return False def get_file(self): return self.__file def count_cached_page(self): return len(self.__cached_page_list) def count_added_page(self): return len(self.__added_page_list) def count_removed_page(self): return len(self.__removed_page_list) def count_indirect_removed_page(self): return len(self.__indirect_removed_page_list) def access_to_cached_page(self, index): self.__cached_page_list.append(index) def add_to_page_cache(self, index): self.__added_page_list.append(index) def remove_from_page_cache(self, index): if self.is_added(index): self.__removed_page_list.append(index) self.__cached_page_list.remove(index) else: self.__indirect_removed_page_list.append(index) def __str__(self): return "p=%s,f=%s,cached:%d,added:%d,removed:%d,indirect_removed:%d" % \ (self.__process.name, self.__file.dev + "," + self.__file.inode, \ self.count_cached_page(), self.count_added_page(), \ self.count_removed_page(), self.count_indirect_removed_page()) class File: def __init__(self, dev, inode): self.__dev = dev self.__inode = inode self.__cached_page_list = [] self.__records = [] self.__reliable = True def get_dev(self): return self.__dev def get_inode(self): return self.__inode def add_record(self, record): self.__records.append(record) def remove_page(self, index): if index in self.__cached_page_list: self.__cached_page_list.remove(index) else: self.__reliable = False def add_page(self, index): if not index in self.__cached_page_list: self.__cached_page_list.append(index) def count_cached_page(self): return len(self.__cached_page_list) class Process: def __init__(self, name): self.__name = name self.__records = [] def get_name(self): return self.__name def add_record(self, record): self.__records.append(record) def get_records(self): return self.__records def sum_cached_page(self): return reduce(lambda s,r : s+r.count_cached_page(), self.__records, 0) def sum_added_page(self): return reduce(lambda s,r : s+r.count_added_page(), self.__records, 0) def sum_removed_page(self): return reduce(lambda s,r : s+r.count_removed_page(), self.__records, 0) def sum_indirect_removed_page(self): return reduce(lambda s,r : s+r.count_indirect_removed_page(), \ self.__records, 0) def convert_to_size(num): PAGE_SIZE=4 units = ['K', 'M', 'G', 'T'] s = num * PAGE_SIZE for u in units: s, m = divmod(s, 1000) if s == 0: return str(m) + u return str(num * PAGE_SIZE) + "K" def process_events(recorder, source): regex_event = \ re.compile("\s*([\w\d-]*)\s*\[\d*\]\s*([0-9.]*):\s*(\w*):\s*(.*)") for line in source: m = regex_event.match(line) if not m: continue (process, time, event, data) = m.groups() if event == "find_get_page": recorder.record_find_get_page(process, data) elif event == "add_to_page_cache": recorder.record_add_to_page_cache(process, data) elif event == "remove_from_page_cache" : recorder.record_remove_from_page_cache(process, data) else: print "Invalid event: %s" % line sys.exit(-1) def show_statistics(recorder): print "[file list]" for f in recorder.get_files(): print " dev:%s, inode:%s, cached: %s" % (f.get_dev(), f.get_inode(), \ convert_to_size(f.count_cached_page())) print "[process list]" for p in recorder.get_processes(): print " process: %s" % p.get_name(), print "(cached: %s, added: %s, removed: %s, indirect removed: %s)" % \ tuple(map(convert_to_size, [p.sum_cached_page(), p.sum_added_page(), p.sum_removed_page(), p.sum_indirect_removed_page()])) for r in p.get_records(): f = r.get_file() print " dev:%s, inode:%s," % (f.get_dev(), f.get_inode()), print "cached:%s, added:%s, removed:%s, indirect removed:%s" % \ tuple(map(convert_to_size, [r.count_cached_page(), r.count_added_page(), r.count_removed_page(), r.count_indirect_removed_page()])) def usage(cmd): print "%s [options]... [file]" % cmd print " -p, --process=name filter by process name" print " -i, --inode=inode filter by inode" print " -d, --dev=device filter by device" print " -h, --help print this help" def main(argv): try: opts, args = getopt.getopt(argv[1:], "hp:i:d:", \ ["help", "process=", "inode=", "dev="]) except getopt.GetoptError: usage(sys.argv[0]) sys.exit(-1) (process, dev, inode) = (None, None, None) for o, a in opts: if o in ("-h", "--help"): usage(argv[0]) sys.exit() if o in ("-p", "--process"): process = a if o in ("-d", "--dev"): dev = a if o in ("-i", "--inode"): inode = a recorder = Recorder() recorder.set_target(process, dev, inode) process_events(recorder, sys.stdin) show_statistics(recorder) if __name__ == "__main__" : main(sys.argv)