diff options
author | 2011-05-19 15:04:43 +0200 | |
---|---|---|
committer | 2011-05-19 15:04:43 +0200 | |
commit | 0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60 (patch) | |
tree | cbe01c33670949b01134907bd37492df2051abfd /pypy/tool | |
parent | fix for the case in which you don't pass the cpython vmrss (diff) | |
download | pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.tar.gz pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.tar.bz2 pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.zip |
move this tool to its own directory; it does not really belong to the JIT
Diffstat (limited to 'pypy/tool')
-rw-r--r-- | pypy/tool/memusage/__init__.py | 0 | ||||
-rw-r--r-- | pypy/tool/memusage/log-template.gnumeric | bin | 0 -> 1176364 bytes | |||
-rw-r--r-- | pypy/tool/memusage/log2gnumeric.py | 180 | ||||
-rw-r--r-- | pypy/tool/memusage/test/test_log2gnumeric.py | 99 |
4 files changed, 279 insertions, 0 deletions
diff --git a/pypy/tool/memusage/__init__.py b/pypy/tool/memusage/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/pypy/tool/memusage/__init__.py diff --git a/pypy/tool/memusage/log-template.gnumeric b/pypy/tool/memusage/log-template.gnumeric Binary files differnew file mode 100644 index 0000000000..b9fc1685fc --- /dev/null +++ b/pypy/tool/memusage/log-template.gnumeric diff --git a/pypy/tool/memusage/log2gnumeric.py b/pypy/tool/memusage/log2gnumeric.py new file mode 100644 index 0000000000..d8bbd72184 --- /dev/null +++ b/pypy/tool/memusage/log2gnumeric.py @@ -0,0 +1,180 @@ +#! /usr/bin/env python +""" +Usage: log2gnumeric logfile + +Produces a logfile.gnumeric file which contains the data extracted from the +logfile generated with the PYPYLOG env variable. + +Currently, it expects log to contain the translation-task and gc-collect +categories. + +You can freely edit the graph in log-template.gnumeric: this script will +create a new file replacing the 'translation-task' and 'gc-collect' sheets. +""" + +import re, sys +import gzip +import optparse + + +def main(logname, options): + logname = sys.argv[1] + outname = logname + '.gnumeric' + data = open(logname).read() + data = data.replace('\n', '') + minclock, maxclock = get_clock_range(data) + time0 = minclock # we want "relative clocks" + maxtime = maxclock-time0 + # + xml = gzip.open('log-template.gnumeric').read() + xml = replace_sheet(xml, 'translation-task', tasks_rows(time0, data)) + xml = replace_sheet(xml, 'gc-collect', gc_collect_rows(time0, data)) + xml = replace_sheet(xml, 'loops', loops_rows(time0, data)) + xml = replace_sheet(xml, 'vmrss', vmrss_rows(logname + '.vmrss', maxtime)) + xml = replace_sheet(xml, 'cpython-vmrss', vmrss_rows(options.cpython_vmrss, maxtime)) + # + out = gzip.open(outname, 'wb') + out.write(xml) + out.close() + + +# ======================================================================== +# functions to manipulate gnumeric files +# ======================================================================== + +def replace_sheet(xml, sheet_name, data): + pattern = '<gnm:Sheet .*?<gnm:Name>%s</gnm:Name>.*?(<gnm:Cells>.*?</gnm:Cells>)' + regex = re.compile(pattern % sheet_name, re.DOTALL) + cells = gen_cells(data) + match = regex.search(xml) + if not match: + print 'Cannot find sheet %s' % sheet_name + return xml + a, b = match.span(1) + xml2 = xml[:a] + cells + xml[b:] + return xml2 + +def gen_cells(data): + # values for the ValueType attribute + ValueType_Empty = 'ValueType="10"' + ValueType_Number = 'ValueType="40"' + ValueType_String = 'ValueType="60"' + # + parts = [] + parts.append('<gnm:Cells>') + for i, row in enumerate(data): + for j, val in enumerate(row): + if val is None: + attr = ValueType_Empty + val = '' + elif isinstance(val, (int, long, float)): + attr = ValueType_Number + else: + attr = ValueType_String + cell = ' <gnm:Cell Row="%d" Col="%d" %s>%s</gnm:Cell>' + parts.append(cell % (i, j, attr, val)) + parts.append(' </gnm:Cells>') + return '\n'.join(parts) + + +# ======================================================================== +# functions to extract various data from the logs +# ======================================================================== + +CLOCK_FACTOR = 1 +def read_clock(x): + timestamp = int(x, 16) + return timestamp / CLOCK_FACTOR + +def get_clock_range(data): + s = r"\[([0-9a-f]+)\] " + r = re.compile(s) + clocks = [read_clock(x) for x in r.findall(data)] + return min(clocks), max(clocks) + +def gc_collect_rows(time0, data): + s = r""" +----------- Full collection ------------------ +\| used before collection: +\| in ArenaCollection: (\d+) bytes +\| raw_malloced: (\d+) bytes +\| used after collection: +\| in ArenaCollection: (\d+) bytes +\| raw_malloced: (\d+) bytes +\| number of major collects: (\d+) +`---------------------------------------------- +\[([0-9a-f]+)\] gc-collect\}""" + # + r = re.compile(s.replace('\n', '')) + yield 'clock', 'gc-before', 'gc-after' + for a,b,c,d,e,f in r.findall(data): + clock = read_clock(f) - time0 + yield clock, int(a)+int(b), int(c)+int(d) + +def tasks_rows(time0, data): + s = r""" +\[([0-9a-f]+)\] \{translation-task +starting ([\w-]+) +""" + # + r = re.compile(s.replace('\n', '')) + yield 'clock', None, 'task' + for a,b in r.findall(data): + clock = read_clock(a) - time0 + yield clock, 1, b + + +def loops_rows(time0, data): + s = r""" +\[([0-9a-f]+)\] \{jit-mem-looptoken-(alloc|free) +(.*?)\[ +""" + # + r = re.compile(s.replace('\n', '')) + yield 'clock', 'total', 'loops', 'bridges' + loops = 0 + bridges = 0 + fake_total = 0 + for clock, action, text in r.findall(data): + clock = read_clock(clock) - time0 + if text.startswith('allocating Loop #'): + loops += 1 + elif text.startswith('allocating Bridge #'): + bridges += 1 + elif text.startswith('freeing Loop #'): + match = re.match('freeing Loop # .* with ([0-9]*) attached bridges', text) + loops -=1 + bridges -= int(match.group(1)) + total = loops+bridges + yield clock, loops+bridges, loops, bridges + + +def vmrss_rows(filename, maxtime): + lines = [] + if filename: + try: + lines = open(filename).readlines() + except IOError: + print 'Warning: cannot find file %s, skipping this sheet' + for row in vmrss_rows_impl(lines, maxtime): + yield row + +def vmrss_rows_impl(lines, maxtime): + yield 'inferred clock', 'VmRSS' + numlines = len(lines) + for i, line in enumerate(lines): + mem = int(line) + clock = maxtime * i // (numlines-1) + yield clock, mem + + +if __name__ == '__main__': + CLOCK_FACTOR = 1000000000.0 # report GigaTicks instead of Ticks + parser = optparse.OptionParser(usage="%prog logfile [options]") + parser.add_option('-c', '--cpython-vmrss', dest='cpython_vmrss', default=None, metavar='FILE', type=str, + help='the .vmrss file produced by CPython') + options, args = parser.parse_args() + if len(args) != 1: + parser.print_help() + sys.exit(2) + main(args[0], options) diff --git a/pypy/tool/memusage/test/test_log2gnumeric.py b/pypy/tool/memusage/test/test_log2gnumeric.py new file mode 100644 index 0000000000..42beea8164 --- /dev/null +++ b/pypy/tool/memusage/test/test_log2gnumeric.py @@ -0,0 +1,99 @@ +from pypy.tool.memusage import log2gnumeric + +log = """ +[1000] ... +[2000] {gc-collect + +.----------- Full collection ------------------ +| used before collection: +| in ArenaCollection: 500 bytes +| raw_malloced: 100 bytes +| used after collection: +| in ArenaCollection: 300 bytes +| raw_malloced: 50 bytes +| number of major collects: 1 +`---------------------------------------------- +[3000] gc-collect} +[4000] {gc-collect + +.----------- Full collection ------------------ +| used before collection: +| in ArenaCollection: 600 bytes +| raw_malloced: 200 bytes +| used after collection: +| in ArenaCollection: 400 bytes +| raw_malloced: 100 bytes +| number of major collects: 1 +`---------------------------------------------- +[5000] gc-collect} +... +... +[6000] {translation-task +starting annotate +... +... +[7000] translation-task} +[8000] {translation-task +starting rtype_lltype +... +... +[9000] translation-task} +... +[a000] ... +""" + +log = log.replace('\n', '') + +def test_get_clock_range(): + minclock, maxclock = log2gnumeric.get_clock_range(log) + assert minclock == 0x1000 + assert maxclock == 0xa000 + + +def test_gc_collect_rows(): + rows = list(log2gnumeric.gc_collect_rows(0x1000, log)) + assert len(rows) == 3 + assert rows[0] == ( 'clock', 'gc-before', 'gc-after') + assert rows[1] == (0x3000-0x1000, 500+100, 300+ 50) + assert rows[2] == (0x5000-0x1000, 600+200, 400+100) + +def test_tasks_rows(): + rows = list(log2gnumeric.tasks_rows(0x1000, log)) + assert len(rows) == 3 + assert rows[0] == ( 'clock', None, 'task') + assert rows[1] == (0x6000-0x1000, 1, 'annotate') + assert rows[2] == (0x8000-0x1000, 1, 'rtype_lltype') + + +def test_vmrss_rows(): + lines = ['100', '200', '300'] + rows = list(log2gnumeric.vmrss_rows_impl(lines, 2000)) + assert len(rows) == 4 + assert rows[0] == ('inferred clock', 'VmRSS') + assert rows[1] == (0, 100) + assert rows[2] == (1000, 200) + assert rows[3] == (2000, 300) + +def test_loops_rows(): + log = """\ +[1000] {jit-mem-looptoken-alloc +allocating Loop # 0 +[1001] jit-mem-looptoken-alloc} +[2000] {jit-mem-looptoken-alloc +allocating Loop # 1 +[2001] jit-mem-looptoken-alloc} +[3000] {jit-mem-looptoken-alloc +allocating Bridge # 1 of Loop # 0 +[3001] jit-mem-looptoken-alloc} +[4000] {jit-mem-looptoken-free +freeing Loop # 0 with 1 attached bridges +[4001] +""" + log = log.replace('\n', '') + rows = list(log2gnumeric.loops_rows(0x1000, log)) + assert len(rows) == 5 + assert rows[0] == ('clock', 'total', 'loops', 'bridges') + assert rows[1] == ( 0x0, 1, 1, 0) + assert rows[2] == ( 0x1000, 2, 2, 0) + assert rows[3] == ( 0x2000, 3, 2, 1) + assert rows[4] == ( 0x3000, 1, 1, 0) |