aboutsummaryrefslogtreecommitdiff
path: root/pypy/tool
diff options
context:
space:
mode:
authorAntonio Cuni <anto.cuni@gmail.com>2011-05-19 15:04:43 +0200
committerAntonio Cuni <anto.cuni@gmail.com>2011-05-19 15:04:43 +0200
commit0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60 (patch)
treecbe01c33670949b01134907bd37492df2051abfd /pypy/tool
parentfix for the case in which you don't pass the cpython vmrss (diff)
downloadpypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.tar.gz
pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.tar.bz2
pypy-0caa89d4fc28cbf26b5b5661e7cbd0a3dbb58c60.zip
move this tool to its own directory; it does not really belong to the JIT
Diffstat (limited to 'pypy/tool')
-rw-r--r--pypy/tool/memusage/__init__.py0
-rw-r--r--pypy/tool/memusage/log-template.gnumericbin0 -> 1176364 bytes
-rw-r--r--pypy/tool/memusage/log2gnumeric.py180
-rw-r--r--pypy/tool/memusage/test/test_log2gnumeric.py99
4 files changed, 279 insertions, 0 deletions
diff --git a/pypy/tool/memusage/__init__.py b/pypy/tool/memusage/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/pypy/tool/memusage/__init__.py
diff --git a/pypy/tool/memusage/log-template.gnumeric b/pypy/tool/memusage/log-template.gnumeric
new file mode 100644
index 0000000000..b9fc1685fc
--- /dev/null
+++ b/pypy/tool/memusage/log-template.gnumeric
Binary files differ
diff --git a/pypy/tool/memusage/log2gnumeric.py b/pypy/tool/memusage/log2gnumeric.py
new file mode 100644
index 0000000000..d8bbd72184
--- /dev/null
+++ b/pypy/tool/memusage/log2gnumeric.py
@@ -0,0 +1,180 @@
+#! /usr/bin/env python
+"""
+Usage: log2gnumeric logfile
+
+Produces a logfile.gnumeric file which contains the data extracted from the
+logfile generated with the PYPYLOG env variable.
+
+Currently, it expects log to contain the translation-task and gc-collect
+categories.
+
+You can freely edit the graph in log-template.gnumeric: this script will
+create a new file replacing the 'translation-task' and 'gc-collect' sheets.
+"""
+
+import re, sys
+import gzip
+import optparse
+
+
+def main(logname, options):
+ logname = sys.argv[1]
+ outname = logname + '.gnumeric'
+ data = open(logname).read()
+ data = data.replace('\n', '')
+ minclock, maxclock = get_clock_range(data)
+ time0 = minclock # we want "relative clocks"
+ maxtime = maxclock-time0
+ #
+ xml = gzip.open('log-template.gnumeric').read()
+ xml = replace_sheet(xml, 'translation-task', tasks_rows(time0, data))
+ xml = replace_sheet(xml, 'gc-collect', gc_collect_rows(time0, data))
+ xml = replace_sheet(xml, 'loops', loops_rows(time0, data))
+ xml = replace_sheet(xml, 'vmrss', vmrss_rows(logname + '.vmrss', maxtime))
+ xml = replace_sheet(xml, 'cpython-vmrss', vmrss_rows(options.cpython_vmrss, maxtime))
+ #
+ out = gzip.open(outname, 'wb')
+ out.write(xml)
+ out.close()
+
+
+# ========================================================================
+# functions to manipulate gnumeric files
+# ========================================================================
+
+def replace_sheet(xml, sheet_name, data):
+ pattern = '<gnm:Sheet .*?<gnm:Name>%s</gnm:Name>.*?(<gnm:Cells>.*?</gnm:Cells>)'
+ regex = re.compile(pattern % sheet_name, re.DOTALL)
+ cells = gen_cells(data)
+ match = regex.search(xml)
+ if not match:
+ print 'Cannot find sheet %s' % sheet_name
+ return xml
+ a, b = match.span(1)
+ xml2 = xml[:a] + cells + xml[b:]
+ return xml2
+
+def gen_cells(data):
+ # values for the ValueType attribute
+ ValueType_Empty = 'ValueType="10"'
+ ValueType_Number = 'ValueType="40"'
+ ValueType_String = 'ValueType="60"'
+ #
+ parts = []
+ parts.append('<gnm:Cells>')
+ for i, row in enumerate(data):
+ for j, val in enumerate(row):
+ if val is None:
+ attr = ValueType_Empty
+ val = ''
+ elif isinstance(val, (int, long, float)):
+ attr = ValueType_Number
+ else:
+ attr = ValueType_String
+ cell = ' <gnm:Cell Row="%d" Col="%d" %s>%s</gnm:Cell>'
+ parts.append(cell % (i, j, attr, val))
+ parts.append(' </gnm:Cells>')
+ return '\n'.join(parts)
+
+
+# ========================================================================
+# functions to extract various data from the logs
+# ========================================================================
+
+CLOCK_FACTOR = 1
+def read_clock(x):
+ timestamp = int(x, 16)
+ return timestamp / CLOCK_FACTOR
+
+def get_clock_range(data):
+ s = r"\[([0-9a-f]+)\] "
+ r = re.compile(s)
+ clocks = [read_clock(x) for x in r.findall(data)]
+ return min(clocks), max(clocks)
+
+def gc_collect_rows(time0, data):
+ s = r"""
+----------- Full collection ------------------
+\| used before collection:
+\| in ArenaCollection: (\d+) bytes
+\| raw_malloced: (\d+) bytes
+\| used after collection:
+\| in ArenaCollection: (\d+) bytes
+\| raw_malloced: (\d+) bytes
+\| number of major collects: (\d+)
+`----------------------------------------------
+\[([0-9a-f]+)\] gc-collect\}"""
+ #
+ r = re.compile(s.replace('\n', ''))
+ yield 'clock', 'gc-before', 'gc-after'
+ for a,b,c,d,e,f in r.findall(data):
+ clock = read_clock(f) - time0
+ yield clock, int(a)+int(b), int(c)+int(d)
+
+def tasks_rows(time0, data):
+ s = r"""
+\[([0-9a-f]+)\] \{translation-task
+starting ([\w-]+)
+"""
+ #
+ r = re.compile(s.replace('\n', ''))
+ yield 'clock', None, 'task'
+ for a,b in r.findall(data):
+ clock = read_clock(a) - time0
+ yield clock, 1, b
+
+
+def loops_rows(time0, data):
+ s = r"""
+\[([0-9a-f]+)\] \{jit-mem-looptoken-(alloc|free)
+(.*?)\[
+"""
+ #
+ r = re.compile(s.replace('\n', ''))
+ yield 'clock', 'total', 'loops', 'bridges'
+ loops = 0
+ bridges = 0
+ fake_total = 0
+ for clock, action, text in r.findall(data):
+ clock = read_clock(clock) - time0
+ if text.startswith('allocating Loop #'):
+ loops += 1
+ elif text.startswith('allocating Bridge #'):
+ bridges += 1
+ elif text.startswith('freeing Loop #'):
+ match = re.match('freeing Loop # .* with ([0-9]*) attached bridges', text)
+ loops -=1
+ bridges -= int(match.group(1))
+ total = loops+bridges
+ yield clock, loops+bridges, loops, bridges
+
+
+def vmrss_rows(filename, maxtime):
+ lines = []
+ if filename:
+ try:
+ lines = open(filename).readlines()
+ except IOError:
+ print 'Warning: cannot find file %s, skipping this sheet'
+ for row in vmrss_rows_impl(lines, maxtime):
+ yield row
+
+def vmrss_rows_impl(lines, maxtime):
+ yield 'inferred clock', 'VmRSS'
+ numlines = len(lines)
+ for i, line in enumerate(lines):
+ mem = int(line)
+ clock = maxtime * i // (numlines-1)
+ yield clock, mem
+
+
+if __name__ == '__main__':
+ CLOCK_FACTOR = 1000000000.0 # report GigaTicks instead of Ticks
+ parser = optparse.OptionParser(usage="%prog logfile [options]")
+ parser.add_option('-c', '--cpython-vmrss', dest='cpython_vmrss', default=None, metavar='FILE', type=str,
+ help='the .vmrss file produced by CPython')
+ options, args = parser.parse_args()
+ if len(args) != 1:
+ parser.print_help()
+ sys.exit(2)
+ main(args[0], options)
diff --git a/pypy/tool/memusage/test/test_log2gnumeric.py b/pypy/tool/memusage/test/test_log2gnumeric.py
new file mode 100644
index 0000000000..42beea8164
--- /dev/null
+++ b/pypy/tool/memusage/test/test_log2gnumeric.py
@@ -0,0 +1,99 @@
+from pypy.tool.memusage import log2gnumeric
+
+log = """
+[1000] ...
+[2000] {gc-collect
+
+.----------- Full collection ------------------
+| used before collection:
+| in ArenaCollection: 500 bytes
+| raw_malloced: 100 bytes
+| used after collection:
+| in ArenaCollection: 300 bytes
+| raw_malloced: 50 bytes
+| number of major collects: 1
+`----------------------------------------------
+[3000] gc-collect}
+[4000] {gc-collect
+
+.----------- Full collection ------------------
+| used before collection:
+| in ArenaCollection: 600 bytes
+| raw_malloced: 200 bytes
+| used after collection:
+| in ArenaCollection: 400 bytes
+| raw_malloced: 100 bytes
+| number of major collects: 1
+`----------------------------------------------
+[5000] gc-collect}
+...
+...
+[6000] {translation-task
+starting annotate
+...
+...
+[7000] translation-task}
+[8000] {translation-task
+starting rtype_lltype
+...
+...
+[9000] translation-task}
+...
+[a000] ...
+"""
+
+log = log.replace('\n', '')
+
+def test_get_clock_range():
+ minclock, maxclock = log2gnumeric.get_clock_range(log)
+ assert minclock == 0x1000
+ assert maxclock == 0xa000
+
+
+def test_gc_collect_rows():
+ rows = list(log2gnumeric.gc_collect_rows(0x1000, log))
+ assert len(rows) == 3
+ assert rows[0] == ( 'clock', 'gc-before', 'gc-after')
+ assert rows[1] == (0x3000-0x1000, 500+100, 300+ 50)
+ assert rows[2] == (0x5000-0x1000, 600+200, 400+100)
+
+def test_tasks_rows():
+ rows = list(log2gnumeric.tasks_rows(0x1000, log))
+ assert len(rows) == 3
+ assert rows[0] == ( 'clock', None, 'task')
+ assert rows[1] == (0x6000-0x1000, 1, 'annotate')
+ assert rows[2] == (0x8000-0x1000, 1, 'rtype_lltype')
+
+
+def test_vmrss_rows():
+ lines = ['100', '200', '300']
+ rows = list(log2gnumeric.vmrss_rows_impl(lines, 2000))
+ assert len(rows) == 4
+ assert rows[0] == ('inferred clock', 'VmRSS')
+ assert rows[1] == (0, 100)
+ assert rows[2] == (1000, 200)
+ assert rows[3] == (2000, 300)
+
+def test_loops_rows():
+ log = """\
+[1000] {jit-mem-looptoken-alloc
+allocating Loop # 0
+[1001] jit-mem-looptoken-alloc}
+[2000] {jit-mem-looptoken-alloc
+allocating Loop # 1
+[2001] jit-mem-looptoken-alloc}
+[3000] {jit-mem-looptoken-alloc
+allocating Bridge # 1 of Loop # 0
+[3001] jit-mem-looptoken-alloc}
+[4000] {jit-mem-looptoken-free
+freeing Loop # 0 with 1 attached bridges
+[4001]
+"""
+ log = log.replace('\n', '')
+ rows = list(log2gnumeric.loops_rows(0x1000, log))
+ assert len(rows) == 5
+ assert rows[0] == ('clock', 'total', 'loops', 'bridges')
+ assert rows[1] == ( 0x0, 1, 1, 0)
+ assert rows[2] == ( 0x1000, 2, 2, 0)
+ assert rows[3] == ( 0x2000, 3, 2, 1)
+ assert rows[4] == ( 0x3000, 1, 1, 0)