From 1662be376465c22461f03ef01dad16cef70f746a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 20 Jul 2008 23:32:00 +0900 Subject: win32kprof: Consider the section alignment when estimating the image base. --- bin/win32kprof.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'bin/win32kprof.py') diff --git a/bin/win32kprof.py b/bin/win32kprof.py index b4f9ce95dc..1876fbc067 100755 --- a/bin/win32kprof.py +++ b/bin/win32kprof.py @@ -35,8 +35,6 @@ import struct __version__ = '0.1' -verbose = False - class ParseError(Exception): pass @@ -211,7 +209,7 @@ class Profile: if self.base_addr is None: ref_addr = self.lookup_symbol('__debug_profile_reference2') if ref_addr: - self.base_addr = addr - ref_addr + self.base_addr = (addr - ref_addr) & ~(options.align - 1) else: self.base_addr = 0 #print hex(self.base_addr) @@ -224,27 +222,27 @@ class Profile: delta += stamp - last_stamp if not exit: - if verbose >= 2: + if options.verbose >= 2: print "%10u >> 0x%08x" % (stamp, addr) - if verbose: + if options.verbose: print "%10u >> %s" % (stamp, name) delta -= caller_overhead stack.append((name, stamp, delta)) delta = 0 else: - if verbose >= 2: + if options.verbose >= 2: print "%10u << 0x%08x" % (stamp, addr) if len(stack): self_time = delta - callee_overhead entry_name, entry_stamp, delta = stack.pop() if entry_name != name: - if verbose: + if options.verbose: print "%10u << %s" % (stamp, name) #assert entry_name == name break total_time = stamp - entry_stamp self.functions[entry_name] = self.functions.get(entry_name, 0) + self_time - if verbose: + if options.verbose: print "%10u << %s %+u" % (stamp, name, self_time) else: delta = 0 @@ -264,6 +262,10 @@ def main(): parser = optparse.OptionParser( usage="\n\t%prog [options] [file] ...", version="%%prog %s" % __version__) + parser.add_option( + '-a', '--align', metavar='NUMBER', + type="int", dest="align", default=16, + help="section alignment") parser.add_option( '-m', '--map', metavar='FILE', type="string", dest="map", @@ -277,10 +279,9 @@ def main(): action="count", dest="verbose", default=0, help="verbose output") - (options, args) = parser.parse_args(sys.argv[1:]) - global verbose - verbose = options.verbose + global options + (options, args) = parser.parse_args(sys.argv[1:]) profile = Profile() if options.base is not None: -- cgit v1.2.3 From ac25408c4773bb75cf6926251e01095ce7928fc0 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 21 Jul 2008 13:02:07 +0900 Subject: win32kprof: Convert tabs to spaces. --- bin/win32kprof.py | 482 +++++++++++++++++++++++++++--------------------------- 1 file changed, 241 insertions(+), 241 deletions(-) (limited to 'bin/win32kprof.py') diff --git a/bin/win32kprof.py b/bin/win32kprof.py index 1876fbc067..c1aabc48c9 100755 --- a/bin/win32kprof.py +++ b/bin/win32kprof.py @@ -37,262 +37,262 @@ __version__ = '0.1' class ParseError(Exception): - pass + pass class MsvcDemangler: - # http://www.kegel.com/mangle.html - - def __init__(self, symbol): - self._symbol = symbol - self._pos = 0 - - def lookahead(self): - return self._symbol[self._pos] - - def consume(self): - ret = self.lookahead() - self._pos += 1 - return ret - - def match(self, c): - if self.lookahead() != c: - raise ParseError - self.consume() - - def parse(self): - self.match('?') - name = self.parse_name() - qualifications = self.parse_qualifications() - return '::'.join(qualifications + [name]) - - def parse_name(self): - if self.lookahead() == '?': - return self.consume() + self.consume() - else: - name = self.parse_id() - self.match('@') - return name - - def parse_qualifications(self): - qualifications = [] - while self.lookahead() != '@': - name = self.parse_id() - qualifications.append(name) - self.match('@') - return qualifications - - def parse_id(self): - s = '' - while True: - c = self.lookahead() - if c.isalnum() or c in '_': - s += c - self.consume() - else: - break - return s + # http://www.kegel.com/mangle.html + + def __init__(self, symbol): + self._symbol = symbol + self._pos = 0 + + def lookahead(self): + return self._symbol[self._pos] + + def consume(self): + ret = self.lookahead() + self._pos += 1 + return ret + + def match(self, c): + if self.lookahead() != c: + raise ParseError + self.consume() + + def parse(self): + self.match('?') + name = self.parse_name() + qualifications = self.parse_qualifications() + return '::'.join(qualifications + [name]) + + def parse_name(self): + if self.lookahead() == '?': + return self.consume() + self.consume() + else: + name = self.parse_id() + self.match('@') + return name + + def parse_qualifications(self): + qualifications = [] + while self.lookahead() != '@': + name = self.parse_id() + qualifications.append(name) + self.match('@') + return qualifications + + def parse_id(self): + s = '' + while True: + c = self.lookahead() + if c.isalnum() or c in '_': + s += c + self.consume() + else: + break + return s def demangle(name): - if name.startswith('_'): - name = name[1:] - idx = name.rfind('@') - if idx != -1 and name[idx+1:].isdigit(): - name = name[:idx] - return name - if name.startswith('?'): - demangler = MsvcDemangler(name) - return demangler.parse() + if name.startswith('_'): + name = name[1:] + idx = name.rfind('@') + if idx != -1 and name[idx+1:].isdigit(): + name = name[:idx] + return name + if name.startswith('?'): + demangler = MsvcDemangler(name) + return demangler.parse() - return name - return name + return name + return name class Profile: - def __init__(self): - self.symbols = [] - self.symbol_cache = {} - self.base_addr = None - self.functions = {} - self.last_stamp = 0 - self.stamp_base = 0 - - def unwrap_stamp(self, stamp): - if stamp < self.last_stamp: - self.stamp_base += 1 << 32 - self.last_stamp = stamp - return self.stamp_base + stamp - - def read_map(self, mapfile): - # See http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx - last_addr = 0 - last_name = 0 - for line in file(mapfile, "rt"): - fields = line.split() - try: - section_offset, name, addr, type, lib_object = fields - except ValueError: - continue - if type != 'f': - continue - section, offset = section_offset.split(':') - addr = int(offset, 16) - name = demangle(name) - if last_addr == addr: - # TODO: handle collapsed functions - #assert last_name == name - continue - self.symbols.append((addr, name)) - last_addr = addr - last_name = name - - # sort symbols - self.symbols.sort(key = lambda (addr, name): addr) - - def lookup_addr(self, addr): - try: - return self.symbol_cache[addr] - except KeyError: - pass - - tolerance = 4196 - s, e = 0, len(self.symbols) - while s != e: - i = (s + e)//2 - start_addr, name = self.symbols[i] - try: - end_addr, next_name = self.symbols[i + 1] - except IndexError: - end_addr = start_addr + tolerance - if addr < start_addr: - e = i - continue - if addr == end_addr: - return next_name - if addr > end_addr: - s = i - continue - return name - return "0x%08x" % addr - - def lookup_symbol(self, name): - for symbol_addr, symbol_name in self.symbols: - if name == symbol_name: - return symbol_addr - return 0 - - def read_data(self, data): - # TODO: compute these automatically - caller_overhead = 672 - 2*144 # __debug_profile_reference2 - 2*__debug_profile_reference1 - callee_overhead = 144 # __debug_profile_reference1 - callee_overhead -= 48 # tolerance - caller_overhead = callee_overhead - - fp = file(data, "rb") - entry_format = "II" - entry_size = struct.calcsize(entry_format) - stack = [] - last_stamp = 0 - delta = 0 - while True: - entry = fp.read(entry_size) - if len(entry) < entry_size: - break - addr_exit, stamp = struct.unpack(entry_format, entry) - if addr_exit == 0 and stamp == 0: - break - addr = addr_exit & 0xfffffffe - exit = addr_exit & 0x00000001 - - if self.base_addr is None: - ref_addr = self.lookup_symbol('__debug_profile_reference2') - if ref_addr: - self.base_addr = (addr - ref_addr) & ~(options.align - 1) - else: - self.base_addr = 0 - #print hex(self.base_addr) - rel_addr = addr - self.base_addr - #print hex(addr - self.base_addr) - - name = self.lookup_addr(rel_addr) - stamp = self.unwrap_stamp(stamp) - - delta += stamp - last_stamp - - if not exit: - if options.verbose >= 2: - print "%10u >> 0x%08x" % (stamp, addr) - if options.verbose: - print "%10u >> %s" % (stamp, name) - delta -= caller_overhead - stack.append((name, stamp, delta)) - delta = 0 - else: - if options.verbose >= 2: - print "%10u << 0x%08x" % (stamp, addr) - if len(stack): - self_time = delta - callee_overhead - entry_name, entry_stamp, delta = stack.pop() - if entry_name != name: - if options.verbose: - print "%10u << %s" % (stamp, name) - #assert entry_name == name - break - total_time = stamp - entry_stamp - self.functions[entry_name] = self.functions.get(entry_name, 0) + self_time - if options.verbose: - print "%10u << %s %+u" % (stamp, name, self_time) - else: - delta = 0 - - last_stamp = stamp - - def write_report(self): - total = sum(self.functions.values()) - results = self.functions.items() - results.sort(key = lambda (name, time): -time) - for name, time in results: - perc = float(time)/float(total)*100.0 - print "%6.03f %s" % (perc, name) + def __init__(self): + self.symbols = [] + self.symbol_cache = {} + self.base_addr = None + self.functions = {} + self.last_stamp = 0 + self.stamp_base = 0 + + def unwrap_stamp(self, stamp): + if stamp < self.last_stamp: + self.stamp_base += 1 << 32 + self.last_stamp = stamp + return self.stamp_base + stamp + + def read_map(self, mapfile): + # See http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx + last_addr = 0 + last_name = 0 + for line in file(mapfile, "rt"): + fields = line.split() + try: + section_offset, name, addr, type, lib_object = fields + except ValueError: + continue + if type != 'f': + continue + section, offset = section_offset.split(':') + addr = int(offset, 16) + name = demangle(name) + if last_addr == addr: + # TODO: handle collapsed functions + #assert last_name == name + continue + self.symbols.append((addr, name)) + last_addr = addr + last_name = name + + # sort symbols + self.symbols.sort(key = lambda (addr, name): addr) + + def lookup_addr(self, addr): + try: + return self.symbol_cache[addr] + except KeyError: + pass + + tolerance = 4196 + s, e = 0, len(self.symbols) + while s != e: + i = (s + e)//2 + start_addr, name = self.symbols[i] + try: + end_addr, next_name = self.symbols[i + 1] + except IndexError: + end_addr = start_addr + tolerance + if addr < start_addr: + e = i + continue + if addr == end_addr: + return next_name + if addr > end_addr: + s = i + continue + return name + return "0x%08x" % addr + + def lookup_symbol(self, name): + for symbol_addr, symbol_name in self.symbols: + if name == symbol_name: + return symbol_addr + return 0 + + def read_data(self, data): + # TODO: compute these automatically + caller_overhead = 672 - 2*144 # __debug_profile_reference2 - 2*__debug_profile_reference1 + callee_overhead = 144 # __debug_profile_reference1 + callee_overhead -= 48 # tolerance + caller_overhead = callee_overhead + + fp = file(data, "rb") + entry_format = "II" + entry_size = struct.calcsize(entry_format) + stack = [] + last_stamp = 0 + delta = 0 + while True: + entry = fp.read(entry_size) + if len(entry) < entry_size: + break + addr_exit, stamp = struct.unpack(entry_format, entry) + if addr_exit == 0 and stamp == 0: + break + addr = addr_exit & 0xfffffffe + exit = addr_exit & 0x00000001 + + if self.base_addr is None: + ref_addr = self.lookup_symbol('__debug_profile_reference2') + if ref_addr: + self.base_addr = (addr - ref_addr) & ~(options.align - 1) + else: + self.base_addr = 0 + #print hex(self.base_addr) + rel_addr = addr - self.base_addr + #print hex(addr - self.base_addr) + + name = self.lookup_addr(rel_addr) + stamp = self.unwrap_stamp(stamp) + + delta += stamp - last_stamp + + if not exit: + if options.verbose >= 2: + print "%10u >> 0x%08x" % (stamp, addr) + if options.verbose: + print "%10u >> %s" % (stamp, name) + delta -= caller_overhead + stack.append((name, stamp, delta)) + delta = 0 + else: + if options.verbose >= 2: + print "%10u << 0x%08x" % (stamp, addr) + if len(stack): + self_time = delta - callee_overhead + entry_name, entry_stamp, delta = stack.pop() + if entry_name != name: + if options.verbose: + print "%10u << %s" % (stamp, name) + #assert entry_name == name + break + total_time = stamp - entry_stamp + self.functions[entry_name] = self.functions.get(entry_name, 0) + self_time + if options.verbose: + print "%10u << %s %+u" % (stamp, name, self_time) + else: + delta = 0 + + last_stamp = stamp + + def write_report(self): + total = sum(self.functions.values()) + results = self.functions.items() + results.sort(key = lambda (name, time): -time) + for name, time in results: + perc = float(time)/float(total)*100.0 + print "%6.03f %s" % (perc, name) def main(): - parser = optparse.OptionParser( - usage="\n\t%prog [options] [file] ...", - version="%%prog %s" % __version__) - parser.add_option( - '-a', '--align', metavar='NUMBER', - type="int", dest="align", default=16, - help="section alignment") - parser.add_option( - '-m', '--map', metavar='FILE', - type="string", dest="map", - help="map file") - parser.add_option( - '-b', '--base', metavar='FILE', - type="string", dest="base", - help="base addr") - parser.add_option( - '-v', '--verbose', - action="count", - dest="verbose", default=0, - help="verbose output") - - global options - (options, args) = parser.parse_args(sys.argv[1:]) - - profile = Profile() - if options.base is not None: - profile.base_addr = int(options.base, 16) - if options.map is not None: - profile.read_map(options.map) - for arg in args: - profile.read_data(arg) - profile.write_report() + parser = optparse.OptionParser( + usage="\n\t%prog [options] [file] ...", + version="%%prog %s" % __version__) + parser.add_option( + '-a', '--align', metavar='NUMBER', + type="int", dest="align", default=16, + help="section alignment") + parser.add_option( + '-m', '--map', metavar='FILE', + type="string", dest="map", + help="map file") + parser.add_option( + '-b', '--base', metavar='FILE', + type="string", dest="base", + help="base addr") + parser.add_option( + '-v', '--verbose', + action="count", + dest="verbose", default=0, + help="verbose output") + + global options + (options, args) = parser.parse_args(sys.argv[1:]) + + profile = Profile() + if options.base is not None: + profile.base_addr = int(options.base, 16) + if options.map is not None: + profile.read_map(options.map) + for arg in args: + profile.read_data(arg) + profile.write_report() if __name__ == '__main__': - main() + main() -- cgit v1.2.3 From 2fafe29793eca081e110276b6e6fbde39f5b90e1 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 21 Jul 2008 19:37:16 +0900 Subject: win32kprof: Generate callgraphs. Relies on gprof2dot.py. --- bin/win32kprof.py | 139 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 91 insertions(+), 48 deletions(-) (limited to 'bin/win32kprof.py') diff --git a/bin/win32kprof.py b/bin/win32kprof.py index c1aabc48c9..4ffa4cc694 100755 --- a/bin/win32kprof.py +++ b/bin/win32kprof.py @@ -32,6 +32,10 @@ import optparse import re import struct +from gprof2dot import Call, Function, Profile +from gprof2dot import CALLS, SAMPLES, TIME, TIME_RATIO, TOTAL_TIME, TOTAL_TIME_RATIO +from gprof2dot import DotWriter, TEMPERATURE_COLORMAP + __version__ = '0.1' @@ -104,18 +108,15 @@ def demangle(name): if name.startswith('?'): demangler = MsvcDemangler(name) return demangler.parse() - - return name return name -class Profile: +class Reader: def __init__(self): self.symbols = [] self.symbol_cache = {} self.base_addr = None - self.functions = {} self.last_stamp = 0 self.stamp_base = 0 @@ -139,7 +140,6 @@ class Profile: continue section, offset = section_offset.split(':') addr = int(offset, 16) - name = demangle(name) if last_addr == addr: # TODO: handle collapsed functions #assert last_name == name @@ -174,8 +174,8 @@ class Profile: if addr > end_addr: s = i continue - return name - return "0x%08x" % addr + return name, addr - start_addr + return "0x%08x" % addr, 0 def lookup_symbol(self, name): for symbol_addr, symbol_name in self.symbols: @@ -184,16 +184,13 @@ class Profile: return 0 def read_data(self, data): - # TODO: compute these automatically - caller_overhead = 672 - 2*144 # __debug_profile_reference2 - 2*__debug_profile_reference1 - callee_overhead = 144 # __debug_profile_reference1 - callee_overhead -= 48 # tolerance - caller_overhead = callee_overhead + profile = Profile() fp = file(data, "rb") entry_format = "II" entry_size = struct.calcsize(entry_format) - stack = [] + caller = None + caller_stack = [] last_stamp = 0 delta = 0 while True: @@ -207,7 +204,7 @@ class Profile: exit = addr_exit & 0x00000001 if self.base_addr is None: - ref_addr = self.lookup_symbol('__debug_profile_reference2') + ref_addr = self.lookup_symbol('___debug_profile_reference2@0') if ref_addr: self.base_addr = (addr - ref_addr) & ~(options.align - 1) else: @@ -216,46 +213,80 @@ class Profile: rel_addr = addr - self.base_addr #print hex(addr - self.base_addr) - name = self.lookup_addr(rel_addr) + symbol, offset = self.lookup_addr(rel_addr) stamp = self.unwrap_stamp(stamp) - - delta += stamp - last_stamp + delta = stamp - last_stamp if not exit: if options.verbose >= 2: - print "%10u >> 0x%08x" % (stamp, addr) + sys.stderr.write("%08x >> 0x%08x\n" % (stamp, addr)) if options.verbose: - print "%10u >> %s" % (stamp, name) - delta -= caller_overhead - stack.append((name, stamp, delta)) - delta = 0 + sys.stderr.write("%+8u >> %s+%u\n" % (delta, symbol, offset)) else: if options.verbose >= 2: - print "%10u << 0x%08x" % (stamp, addr) - if len(stack): - self_time = delta - callee_overhead - entry_name, entry_stamp, delta = stack.pop() - if entry_name != name: - if options.verbose: - print "%10u << %s" % (stamp, name) - #assert entry_name == name - break - total_time = stamp - entry_stamp - self.functions[entry_name] = self.functions.get(entry_name, 0) + self_time - if options.verbose: - print "%10u << %s %+u" % (stamp, name, self_time) + sys.stderr.write("%08x << 0x%08x\n" % (stamp, addr)) + if options.verbose: + sys.stderr.write("%+8u << %s+%u\n" % (delta, symbol, offset)) + + # Eliminate outliers + if exit and delta > 0x1000000: + # TODO: Use a statistic test instead of a threshold + sys.stderr.write("warning: ignoring excessive delta of +%u in function %s\n" % (delta, symbol)) + delta = 0 + + # Remove overhead + # TODO: compute the overhead automatically + delta = max(0, delta - 84) + + if caller is not None: + caller[SAMPLES] += delta + + if not exit: + # Function call + try: + callee = profile.functions[symbol] + except KeyError: + name = demangle(symbol) + callee = Function(symbol, name) + profile.add_function(callee) + callee[CALLS] = 1 + callee[SAMPLES] = 0 else: - delta = 0 + callee[CALLS] += 1 + + if caller is not None: + try: + call = caller.calls[callee.id] + except KeyError: + call = Call(callee.id) + call[CALLS] = 1 + caller.add_call(call) + else: + call[CALLS] += 1 + caller_stack.append(caller) + + caller = callee + + else: + # Function return + if caller is not None: + assert caller.id == symbol + try: + caller = caller_stack.pop() + except IndexError: + caller = None last_stamp = stamp - def write_report(self): - total = sum(self.functions.values()) - results = self.functions.items() - results.sort(key = lambda (name, time): -time) - for name, time in results: - perc = float(time)/float(total)*100.0 - print "%6.03f %s" % (perc, name) + # compute derived data + profile.validate() + profile.find_cycles() + profile.aggregate(SAMPLES) + profile.ratio(TIME_RATIO, SAMPLES) + profile.call_ratios(CALLS) + profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) + + return profile def main(): @@ -274,6 +305,14 @@ def main(): '-b', '--base', metavar='FILE', type="string", dest="base", help="base addr") + parser.add_option( + '-n', '--node-thres', metavar='PERCENTAGE', + type="float", dest="node_thres", default=0.5, + help="eliminate nodes below this threshold [default: %default]") + parser.add_option( + '-e', '--edge-thres', metavar='PERCENTAGE', + type="float", dest="edge_thres", default=0.1, + help="eliminate edges below this threshold [default: %default]") parser.add_option( '-v', '--verbose', action="count", @@ -283,14 +322,18 @@ def main(): global options (options, args) = parser.parse_args(sys.argv[1:]) - profile = Profile() + reader = Reader() if options.base is not None: - profile.base_addr = int(options.base, 16) + reader.base_addr = int(options.base, 16) if options.map is not None: - profile.read_map(options.map) + reader.read_map(options.map) for arg in args: - profile.read_data(arg) - profile.write_report() + profile = reader.read_data(arg) + profile.prune(options.node_thres/100.0, options.edge_thres/100.0) + output = sys.stdout + dot = DotWriter(output) + colormap = TEMPERATURE_COLORMAP + dot.graph(profile, colormap) if __name__ == '__main__': -- cgit v1.2.3 From 883097053d1d3550cab92362a07f413e110520ae Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 22 Jul 2008 09:45:10 +0900 Subject: win32kprof: Store the profile data as an caller->callee hash table, instead of a trace. --- bin/win32kprof.py | 124 +++++------- src/gallium/auxiliary/util/p_debug_prof.c | 308 ++++++++++++++++++------------ 2 files changed, 233 insertions(+), 199 deletions(-) (limited to 'bin/win32kprof.py') diff --git a/bin/win32kprof.py b/bin/win32kprof.py index 4ffa4cc694..c36317d23a 100755 --- a/bin/win32kprof.py +++ b/bin/win32kprof.py @@ -117,15 +117,7 @@ class Reader: self.symbols = [] self.symbol_cache = {} self.base_addr = None - self.last_stamp = 0 - self.stamp_base = 0 - def unwrap_stamp(self, stamp): - if stamp < self.last_stamp: - self.stamp_base += 1 << 32 - self.last_stamp = stamp - return self.stamp_base + stamp - def read_map(self, mapfile): # See http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx last_addr = 0 @@ -140,10 +132,6 @@ class Reader: continue section, offset = section_offset.split(':') addr = int(offset, 16) - if last_addr == addr: - # TODO: handle collapsed functions - #assert last_name == name - continue self.symbols.append((addr, name)) last_addr = addr last_name = name @@ -170,12 +158,12 @@ class Reader: e = i continue if addr == end_addr: - return next_name + return next_name, addr - start_addr if addr > end_addr: s = i continue return name, addr - start_addr - return "0x%08x" % addr, 0 + raise ValueError def lookup_symbol(self, name): for symbol_addr, symbol_name in self.symbols: @@ -187,96 +175,76 @@ class Reader: profile = Profile() fp = file(data, "rb") - entry_format = "II" + entry_format = "IIII" entry_size = struct.calcsize(entry_format) caller = None caller_stack = [] - last_stamp = 0 - delta = 0 while True: entry = fp.read(entry_size) if len(entry) < entry_size: break - addr_exit, stamp = struct.unpack(entry_format, entry) - if addr_exit == 0 and stamp == 0: - break - addr = addr_exit & 0xfffffffe - exit = addr_exit & 0x00000001 + caller_addr, callee_addr, samples_lo, samples_hi = struct.unpack(entry_format, entry) + if caller_addr == 0 and callee_addr == 0: + continue if self.base_addr is None: - ref_addr = self.lookup_symbol('___debug_profile_reference2@0') + ref_addr = self.lookup_symbol('___debug_profile_reference@0') if ref_addr: - self.base_addr = (addr - ref_addr) & ~(options.align - 1) + self.base_addr = (caller_addr - ref_addr) & ~(options.align - 1) else: self.base_addr = 0 - #print hex(self.base_addr) - rel_addr = addr - self.base_addr - #print hex(addr - self.base_addr) - - symbol, offset = self.lookup_addr(rel_addr) - stamp = self.unwrap_stamp(stamp) - delta = stamp - last_stamp - - if not exit: - if options.verbose >= 2: - sys.stderr.write("%08x >> 0x%08x\n" % (stamp, addr)) - if options.verbose: - sys.stderr.write("%+8u >> %s+%u\n" % (delta, symbol, offset)) + sys.stderr.write('Base addr: %08x\n' % self.base_addr) + + samples = (samples_hi << 32) | samples_lo + + try: + caller_raddr = caller_addr - self.base_addr + caller_sym, caller_ofs = self.lookup_addr(caller_raddr) + + try: + caller = profile.functions[caller_sym] + except KeyError: + caller_name = demangle(caller_sym) + caller = Function(caller_sym, caller_name) + profile.add_function(caller) + caller[CALLS] = 0 + caller[SAMPLES] = 0 + except ValueError: + caller = None + + if not callee_addr: + if caller: + caller[SAMPLES] += samples else: - if options.verbose >= 2: - sys.stderr.write("%08x << 0x%08x\n" % (stamp, addr)) - if options.verbose: - sys.stderr.write("%+8u << %s+%u\n" % (delta, symbol, offset)) - - # Eliminate outliers - if exit and delta > 0x1000000: - # TODO: Use a statistic test instead of a threshold - sys.stderr.write("warning: ignoring excessive delta of +%u in function %s\n" % (delta, symbol)) - delta = 0 - - # Remove overhead - # TODO: compute the overhead automatically - delta = max(0, delta - 84) - - if caller is not None: - caller[SAMPLES] += delta - - if not exit: - # Function call + callee_raddr = callee_addr - self.base_addr + callee_sym, callee_ofs = self.lookup_addr(callee_raddr) + try: - callee = profile.functions[symbol] + callee = profile.functions[callee_sym] except KeyError: - name = demangle(symbol) - callee = Function(symbol, name) + callee_name = demangle(callee_sym) + callee = Function(callee_sym, callee_name) profile.add_function(callee) - callee[CALLS] = 1 + callee[CALLS] = samples callee[SAMPLES] = 0 else: - callee[CALLS] += 1 + callee[CALLS] += samples if caller is not None: try: call = caller.calls[callee.id] except KeyError: call = Call(callee.id) - call[CALLS] = 1 + call[CALLS] = samples caller.add_call(call) else: - call[CALLS] += 1 - caller_stack.append(caller) - - caller = callee - - else: - # Function return - if caller is not None: - assert caller.id == symbol - try: - caller = caller_stack.pop() - except IndexError: - caller = None - - last_stamp = stamp + call[CALLS] += samples + + if options.verbose: + if not callee_addr: + sys.stderr.write('%s+%u: %u\n' % (caller_sym, caller_ofs, samples)) + else: + sys.stderr.write('%s+%u -> %s+%u: %u\n' % (caller_sym, caller_ofs, callee_sym, callee_ofs, samples)) # compute derived data profile.validate() diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/p_debug_prof.c index 69c914c780..5f9772ef91 100644 --- a/src/gallium/auxiliary/util/p_debug_prof.c +++ b/src/gallium/auxiliary/util/p_debug_prof.c @@ -46,97 +46,83 @@ #include "util/u_string.h" -#define PROFILE_FILE_SIZE 4*1024*1024 +#define PROFILE_TABLE_SIZE (1024*1024) #define FILE_NAME_SIZE 256 -static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.trace"; -static ULONG_PTR iFile = 0; -static BYTE *pMap = NULL; -static BYTE *pMapBegin = NULL; -static BYTE *pMapEnd = NULL; +struct debug_profile_entry +{ + uintptr_t caller; + uintptr_t callee; + uint64_t samples; +}; +static unsigned long enabled = 0; -void __declspec(naked) __cdecl -debug_profile_close(void) -{ - _asm { - push eax - push ebx - push ecx - push edx - push ebp - push edi - push esi - } +static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.prof"; +static ULONG_PTR iFile = 0; - if(iFile) { - EngUnmapFile(iFile); - /* Truncate the file */ - pMap = EngMapFile(wFileName, pMap - pMapBegin, &iFile); - if(pMap) - EngUnmapFile(iFile); - } - iFile = 0; - pMapBegin = pMapEnd = pMap = NULL; - - _asm { - pop esi - pop edi - pop ebp - pop edx - pop ecx - pop ebx - pop eax - ret - } -} +static struct debug_profile_entry *table = NULL; +static unsigned long free_table_entries = 0; +static unsigned long max_table_entries = 0; +uint64_t start_stamp = 0; +uint64_t end_stamp = 0; -void __declspec(naked) __cdecl -debug_profile_open(void) -{ - WCHAR *p; - - _asm { - push eax - push ebx - push ecx - push edx - push ebp - push edi - push esi - } - debug_profile_close(); +static void +debug_profile_entry(uintptr_t caller, uintptr_t callee, uint64_t samples) +{ + unsigned hash = ( caller + callee ) & PROFILE_TABLE_SIZE - 1; - // increment starting from the less significant digit - p = &wFileName[14]; while(1) { - if(*p == '9') { - *p-- = '0'; + if(table[hash].caller == 0 && table[hash].callee == 0) { + table[hash].caller = caller; + table[hash].callee = callee; + table[hash].samples = samples; + --free_table_entries; + break; } - else { - *p += 1; + else if(table[hash].caller == caller && table[hash].callee == callee) { + table[hash].samples += samples; break; } + else { + ++hash; + } } +} - pMap = EngMapFile(wFileName, PROFILE_FILE_SIZE, &iFile); - if(pMap) { - pMapBegin = pMap; - pMapEnd = pMap + PROFILE_FILE_SIZE; - } - - _asm { - pop esi - pop edi - pop ebp - pop edx - pop ecx - pop ebx - pop eax - ret - } + +static uintptr_t caller_stack[1024]; +static unsigned last_caller = 0; + + +static int64_t delta(void) { + int64_t result = end_stamp - start_stamp; + if(result > UINT64_C(0xffffffff)) + result = 0; + return result; +} + + +static void __cdecl +debug_profile_enter(uintptr_t callee) +{ + uintptr_t caller = last_caller ? caller_stack[last_caller - 1] : 0; + + if (caller) + debug_profile_entry(caller, 0, delta()); + debug_profile_entry(caller, callee, 1); + caller_stack[last_caller++] = callee; +} + + +static void __cdecl +debug_profile_exit(uintptr_t callee) +{ + debug_profile_entry(callee, 0, delta()); + if(last_caller) + --last_caller; } @@ -148,31 +134,49 @@ debug_profile_open(void) void __declspec(naked) __cdecl _penter(void) { _asm { - push ebx -retry: - mov ebx, [pMap] - test ebx, ebx - jz done - cmp ebx, [pMapEnd] - jne ready - call debug_profile_open - jmp retry -ready: push eax + mov eax, [enabled] + test eax, eax + jz skip + push edx - mov eax, [esp+12] - and eax, 0xfffffffe - mov [ebx], eax - add ebx, 4 + rdtsc - mov [ebx], eax - add ebx, 4 - mov [pMap], ebx + mov dword ptr [end_stamp], eax + mov dword ptr [end_stamp+4], edx + + xor eax, eax + mov [enabled], eax + + mov eax, [esp+8] + + push ebx + push ecx + push ebp + push edi + push esi + + push eax + call debug_profile_enter + add esp, 4 + + pop esi + pop edi + pop ebp + pop ecx + pop ebx + + mov eax, 1 + mov [enabled], eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + pop edx +skip: pop eax -done: - pop ebx - ret + ret } } @@ -185,30 +189,48 @@ done: void __declspec(naked) __cdecl _pexit(void) { _asm { - push ebx -retry: - mov ebx, [pMap] - test ebx, ebx - jz done - cmp ebx, [pMapEnd] - jne ready - call debug_profile_open - jmp retry -ready: push eax + mov eax, [enabled] + test eax, eax + jz skip + push edx - mov eax, [esp+12] - or eax, 0x00000001 - mov [ebx], eax - add ebx, 4 + rdtsc - mov [ebx], eax - add ebx, 4 - mov [pMap], ebx + mov dword ptr [end_stamp], eax + mov dword ptr [end_stamp+4], edx + + xor eax, eax + mov [enabled], eax + + mov eax, [esp+8] + + push ebx + push ecx + push ebp + push edi + push esi + + push eax + call debug_profile_exit + add esp, 4 + + pop esi + pop edi + pop ebp + pop ecx + pop ebx + + mov eax, 1 + mov [enabled], eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + pop edx +skip: pop eax -done: - pop ebx ret } } @@ -230,15 +252,53 @@ __debug_profile_reference(void) { void debug_profile_start(void) { - debug_profile_open(); - - if(pMap) { + WCHAR *p; + + // increment starting from the less significant digit + p = &wFileName[14]; + while(1) { + if(*p == '9') { + *p-- = '0'; + } + else { + *p += 1; + break; + } + } + + table = EngMapFile(wFileName, + PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry), + &iFile); + if(table) { unsigned i; + + free_table_entries = max_table_entries = PROFILE_TABLE_SIZE; + memset(table, 0, PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry)); + + table[0].caller = (uintptr_t)&__debug_profile_reference; + table[0].callee = 0; + table[0].samples = 0; + --free_table_entries; + + _asm { + push edx + push eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + + pop edx + pop eax + } + + last_caller = 0; + + enabled = 1; + for(i = 0; i < 8; ++i) { _asm { - call _penter call __debug_profile_reference - call _pexit } } } @@ -248,7 +308,13 @@ debug_profile_start(void) void debug_profile_stop(void) { - debug_profile_close(); + enabled = 0; + + if(iFile) + EngUnmapFile(iFile); + iFile = 0; + table = NULL; + free_table_entries = max_table_entries = 0; } #endif /* PROFILE */ -- cgit v1.2.3