move this tool to its own directory; it does not really belong to the JIT

author: Antonio Cuni <anto.cuni@gmail.com> 2011-05-19 15:04:43 +0200
committer: Antonio Cuni <anto.cuni@gmail.com> 2011-05-19 15:04:43 +0200
commit: 0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60 (patch)
tree: cbe01c33670949b01134907bd37492df2051abfd /pypy/tool
parent: fix for the case in which you don't pass the cpython vmrss (diff)
download: pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.tar.gz
pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.tar.bz2
pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.zip
4 files changed, 279 insertions, 0 deletions
diff --git a/pypy/tool/memusage/__init__.py b/pypy/tool/memusage/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/pypy/tool/memusage/__init__.py
diff --git a/pypy/tool/memusage/log-template.gnumeric b/pypy/tool/memusage/log-template.gnumeric
new file mode 100644
index 0000000000..b9fc1685fc
--- /dev/null
+++ b/pypy/tool/memusage/log-template.gnumeric
diff --git a/pypy/tool/memusage/log2gnumeric.py b/pypy/tool/memusage/log2gnumeric.py
new file mode 100644
index 0000000000..d8bbd72184
--- /dev/null
+++ b/pypy/tool/memusage/log2gnumeric.py
@@ -0,0 +1,180 @@
+#! /usr/bin/env python
+"""
+Usage: log2gnumeric logfile
+
+Produces a logfile.gnumeric file which contains the data extracted from the
+logfile generated with the PYPYLOG env variable.
+
+Currently, it expects log to contain the translation-task and gc-collect
+categories.
+
+You can freely edit the graph in log-template.gnumeric: this script will
+create a new file replacing the 'translation-task' and 'gc-collect' sheets.
+"""
+
+import re, sys
+import gzip
+import optparse
+
+
+def main(logname, options):
+    logname = sys.argv[1]
+    outname = logname + '.gnumeric'
+    data = open(logname).read()
+    data = data.replace('\n', '')
+    minclock, maxclock = get_clock_range(data)
+    time0 = minclock # we want "relative clocks"
+    maxtime = maxclock-time0
+    #
+    xml = gzip.open('log-template.gnumeric').read()
+    xml = replace_sheet(xml, 'translation-task', tasks_rows(time0, data))
+    xml = replace_sheet(xml, 'gc-collect', gc_collect_rows(time0, data))
+    xml = replace_sheet(xml, 'loops', loops_rows(time0, data))
+    xml = replace_sheet(xml, 'vmrss', vmrss_rows(logname + '.vmrss', maxtime))
+    xml = replace_sheet(xml, 'cpython-vmrss', vmrss_rows(options.cpython_vmrss, maxtime))
+    #
+    out = gzip.open(outname, 'wb')
+    out.write(xml)
+    out.close()
+
+
+# ========================================================================
+# functions to manipulate gnumeric files
+# ========================================================================
+
+def replace_sheet(xml, sheet_name, data):
+    pattern = '<gnm:Sheet .*?<gnm:Name>%s</gnm:Name>.*?(<gnm:Cells>.*?</gnm:Cells>)'
+    regex = re.compile(pattern % sheet_name, re.DOTALL)
+    cells = gen_cells(data)
+    match = regex.search(xml)
+    if not match:
+        print 'Cannot find sheet %s' % sheet_name
+        return xml
+    a, b = match.span(1)
+    xml2 = xml[:a] + cells + xml[b:]
+    return xml2
+
+def gen_cells(data):
+    # values for the ValueType attribute
+    ValueType_Empty  = 'ValueType="10"'
+    ValueType_Number = 'ValueType="40"'
+    ValueType_String = 'ValueType="60"'
+    #
+    parts = []
+    parts.append('<gnm:Cells>')
+    for i, row in enumerate(data):
+        for j, val in enumerate(row):
+            if val is None:
+                attr = ValueType_Empty
+                val = ''
+            elif isinstance(val, (int, long, float)):
+                attr = ValueType_Number
+            else:
+                attr = ValueType_String
+            cell = '        <gnm:Cell Row="%d" Col="%d" %s>%s</gnm:Cell>'
+            parts.append(cell % (i, j, attr, val))
+    parts.append('      </gnm:Cells>')
+    return '\n'.join(parts)
+    
+
+# ========================================================================
+# functions to extract various data from the logs
+# ========================================================================
+
+CLOCK_FACTOR = 1
+def read_clock(x):
+    timestamp = int(x, 16)
+    return timestamp / CLOCK_FACTOR
+
+def get_clock_range(data):
+    s = r"\[([0-9a-f]+)\] "
+    r = re.compile(s)
+    clocks = [read_clock(x) for x in r.findall(data)]
+    return min(clocks), max(clocks)
+
+def gc_collect_rows(time0, data):
+    s = r"""
+----------- Full collection ------------------
+\| used before collection:
+\|          in ArenaCollection:      (\d+) bytes
+\|          raw_malloced:            (\d+) bytes
+\| used after collection:
+\|          in ArenaCollection:      (\d+) bytes
+\|          raw_malloced:            (\d+) bytes
+\| number of major collects:         (\d+)
+`----------------------------------------------
+\[([0-9a-f]+)\] gc-collect\}"""
+    #
+    r = re.compile(s.replace('\n', ''))
+    yield 'clock', 'gc-before', 'gc-after'
+    for a,b,c,d,e,f in r.findall(data):
+        clock = read_clock(f) - time0
+        yield clock, int(a)+int(b), int(c)+int(d)
+
+def tasks_rows(time0, data):
+    s = r"""
+\[([0-9a-f]+)\] \{translation-task
+starting ([\w-]+)
+"""
+    #
+    r = re.compile(s.replace('\n', ''))
+    yield 'clock', None, 'task'
+    for a,b in r.findall(data):
+        clock = read_clock(a) - time0
+        yield clock, 1, b
+
+
+def loops_rows(time0, data):
+    s = r"""
+\[([0-9a-f]+)\] \{jit-mem-looptoken-(alloc|free)
+(.*?)\[
+"""
+    #
+    r = re.compile(s.replace('\n', ''))
+    yield 'clock', 'total', 'loops', 'bridges'
+    loops = 0
+    bridges = 0
+    fake_total = 0
+    for clock, action, text in r.findall(data):
+        clock = read_clock(clock) - time0
+        if text.startswith('allocating Loop #'):
+            loops += 1
+        elif text.startswith('allocating Bridge #'):
+            bridges += 1
+        elif text.startswith('freeing Loop #'):
+            match = re.match('freeing Loop # .* with ([0-9]*) attached bridges', text)
+            loops -=1
+            bridges -= int(match.group(1))
+        total = loops+bridges
+        yield clock, loops+bridges, loops, bridges
+
+
+def vmrss_rows(filename, maxtime):
+    lines = []
+    if filename:
+        try:
+            lines = open(filename).readlines()
+        except IOError:
+            print 'Warning: cannot find file %s, skipping this sheet'
+    for row in vmrss_rows_impl(lines, maxtime):
+        yield row
+
+def vmrss_rows_impl(lines, maxtime):
+    yield 'inferred clock', 'VmRSS'
+    numlines = len(lines)
+    for i, line in enumerate(lines):
+        mem = int(line)
+        clock = maxtime * i // (numlines-1)
+        yield clock, mem
+
+
+if __name__ == '__main__':
+    CLOCK_FACTOR = 1000000000.0 # report GigaTicks instead of Ticks
+    parser = optparse.OptionParser(usage="%prog logfile [options]")
+    parser.add_option('-c', '--cpython-vmrss', dest='cpython_vmrss', default=None, metavar='FILE', type=str,
+                      help='the .vmrss file produced by CPython')
+    options, args = parser.parse_args()
+    if len(args) != 1:
+        parser.print_help()
+        sys.exit(2)
+    main(args[0], options)
diff --git a/pypy/tool/memusage/test/test_log2gnumeric.py b/pypy/tool/memusage/test/test_log2gnumeric.py
new file mode 100644
index 0000000000..42beea8164
--- /dev/null
+++ b/pypy/tool/memusage/test/test_log2gnumeric.py
@@ -0,0 +1,99 @@
+from pypy.tool.memusage import log2gnumeric
+
+log = """
+[1000] ...
+[2000] {gc-collect
+
+.----------- Full collection ------------------
+| used before collection:
+|          in ArenaCollection:      500 bytes
+|          raw_malloced:            100 bytes
+| used after collection:
+|          in ArenaCollection:      300 bytes
+|          raw_malloced:            50 bytes
+| number of major collects:         1
+`----------------------------------------------
+[3000] gc-collect}
+[4000] {gc-collect
+
+.----------- Full collection ------------------
+| used before collection:
+|          in ArenaCollection:      600 bytes
+|          raw_malloced:            200 bytes
+| used after collection:
+|          in ArenaCollection:      400 bytes
+|          raw_malloced:            100 bytes
+| number of major collects:         1
+`----------------------------------------------
+[5000] gc-collect}
+...
+...
+[6000] {translation-task
+starting annotate
+...
+...
+[7000] translation-task}
+[8000] {translation-task
+starting rtype_lltype
+...
+...
+[9000] translation-task}
+...
+[a000] ...
+"""
+
+log = log.replace('\n', '')
+
+def test_get_clock_range():
+    minclock, maxclock = log2gnumeric.get_clock_range(log)
+    assert minclock == 0x1000
+    assert maxclock == 0xa000
+    
+
+def test_gc_collect_rows():
+    rows = list(log2gnumeric.gc_collect_rows(0x1000, log))
+    assert len(rows) == 3
+    assert rows[0] == (      'clock', 'gc-before', 'gc-after')
+    assert rows[1] == (0x3000-0x1000,     500+100,    300+ 50)
+    assert rows[2] == (0x5000-0x1000,     600+200,    400+100)
+    
+def test_tasks_rows():
+    rows = list(log2gnumeric.tasks_rows(0x1000, log))
+    assert len(rows) == 3
+    assert rows[0] == (      'clock', None, 'task')
+    assert rows[1] == (0x6000-0x1000,    1, 'annotate')
+    assert rows[2] == (0x8000-0x1000,    1, 'rtype_lltype')
+
+
+def test_vmrss_rows():
+    lines = ['100', '200', '300']
+    rows = list(log2gnumeric.vmrss_rows_impl(lines, 2000))
+    assert len(rows) == 4
+    assert rows[0] == ('inferred clock', 'VmRSS')
+    assert rows[1] == (0, 100)
+    assert rows[2] == (1000, 200)
+    assert rows[3] == (2000, 300)
+
+def test_loops_rows():
+    log = """\
+[1000] {jit-mem-looptoken-alloc
+allocating Loop # 0
+[1001] jit-mem-looptoken-alloc}
+[2000] {jit-mem-looptoken-alloc
+allocating Loop # 1
+[2001] jit-mem-looptoken-alloc}
+[3000] {jit-mem-looptoken-alloc
+allocating Bridge # 1 of Loop # 0
+[3001] jit-mem-looptoken-alloc}
+[4000] {jit-mem-looptoken-free
+freeing Loop # 0 with 1 attached bridges
+[4001]
+"""
+    log = log.replace('\n', '')
+    rows = list(log2gnumeric.loops_rows(0x1000, log))
+    assert len(rows) == 5
+    assert rows[0] == ('clock', 'total', 'loops', 'bridges')
+    assert rows[1] == (    0x0,       1,       1,         0)
+    assert rows[2] == ( 0x1000,       2,       2,         0)
+    assert rows[3] == ( 0x2000,       3,       2,         1)
+    assert rows[4] == ( 0x3000,       1,       1,         0)
author	Antonio Cuni <anto.cuni@gmail.com>	2011-05-19 15:04:43 +0200
committer	Antonio Cuni <anto.cuni@gmail.com>	2011-05-19 15:04:43 +0200
commit	0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60 (patch)
tree	cbe01c33670949b01134907bd37492df2051abfd /pypy/tool
parent	fix for the case in which you don't pass the cpython vmrss (diff)
download	pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.tar.gz pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.tar.bz2 pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.zip