summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/win32kprof.py124
-rw-r--r--src/gallium/auxiliary/util/p_debug_prof.c308
2 files changed, 233 insertions, 199 deletions
diff --git a/bin/win32kprof.py b/bin/win32kprof.py
index 4ffa4cc694..c36317d23a 100755
--- a/bin/win32kprof.py
+++ b/bin/win32kprof.py
@@ -117,15 +117,7 @@ class Reader:
self.symbols = []
self.symbol_cache = {}
self.base_addr = None
- self.last_stamp = 0
- self.stamp_base = 0
- def unwrap_stamp(self, stamp):
- if stamp < self.last_stamp:
- self.stamp_base += 1 << 32
- self.last_stamp = stamp
- return self.stamp_base + stamp
-
def read_map(self, mapfile):
# See http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx
last_addr = 0
@@ -140,10 +132,6 @@ class Reader:
continue
section, offset = section_offset.split(':')
addr = int(offset, 16)
- if last_addr == addr:
- # TODO: handle collapsed functions
- #assert last_name == name
- continue
self.symbols.append((addr, name))
last_addr = addr
last_name = name
@@ -170,12 +158,12 @@ class Reader:
e = i
continue
if addr == end_addr:
- return next_name
+ return next_name, addr - start_addr
if addr > end_addr:
s = i
continue
return name, addr - start_addr
- return "0x%08x" % addr, 0
+ raise ValueError
def lookup_symbol(self, name):
for symbol_addr, symbol_name in self.symbols:
@@ -187,96 +175,76 @@ class Reader:
profile = Profile()
fp = file(data, "rb")
- entry_format = "II"
+ entry_format = "IIII"
entry_size = struct.calcsize(entry_format)
caller = None
caller_stack = []
- last_stamp = 0
- delta = 0
while True:
entry = fp.read(entry_size)
if len(entry) < entry_size:
break
- addr_exit, stamp = struct.unpack(entry_format, entry)
- if addr_exit == 0 and stamp == 0:
- break
- addr = addr_exit & 0xfffffffe
- exit = addr_exit & 0x00000001
+ caller_addr, callee_addr, samples_lo, samples_hi = struct.unpack(entry_format, entry)
+ if caller_addr == 0 and callee_addr == 0:
+ continue
if self.base_addr is None:
- ref_addr = self.lookup_symbol('___debug_profile_reference2@0')
+ ref_addr = self.lookup_symbol('___debug_profile_reference@0')
if ref_addr:
- self.base_addr = (addr - ref_addr) & ~(options.align - 1)
+ self.base_addr = (caller_addr - ref_addr) & ~(options.align - 1)
else:
self.base_addr = 0
- #print hex(self.base_addr)
- rel_addr = addr - self.base_addr
- #print hex(addr - self.base_addr)
-
- symbol, offset = self.lookup_addr(rel_addr)
- stamp = self.unwrap_stamp(stamp)
- delta = stamp - last_stamp
-
- if not exit:
- if options.verbose >= 2:
- sys.stderr.write("%08x >> 0x%08x\n" % (stamp, addr))
- if options.verbose:
- sys.stderr.write("%+8u >> %s+%u\n" % (delta, symbol, offset))
+ sys.stderr.write('Base addr: %08x\n' % self.base_addr)
+
+ samples = (samples_hi << 32) | samples_lo
+
+ try:
+ caller_raddr = caller_addr - self.base_addr
+ caller_sym, caller_ofs = self.lookup_addr(caller_raddr)
+
+ try:
+ caller = profile.functions[caller_sym]
+ except KeyError:
+ caller_name = demangle(caller_sym)
+ caller = Function(caller_sym, caller_name)
+ profile.add_function(caller)
+ caller[CALLS] = 0
+ caller[SAMPLES] = 0
+ except ValueError:
+ caller = None
+
+ if not callee_addr:
+ if caller:
+ caller[SAMPLES] += samples
else:
- if options.verbose >= 2:
- sys.stderr.write("%08x << 0x%08x\n" % (stamp, addr))
- if options.verbose:
- sys.stderr.write("%+8u << %s+%u\n" % (delta, symbol, offset))
-
- # Eliminate outliers
- if exit and delta > 0x1000000:
- # TODO: Use a statistic test instead of a threshold
- sys.stderr.write("warning: ignoring excessive delta of +%u in function %s\n" % (delta, symbol))
- delta = 0
-
- # Remove overhead
- # TODO: compute the overhead automatically
- delta = max(0, delta - 84)
-
- if caller is not None:
- caller[SAMPLES] += delta
-
- if not exit:
- # Function call
+ callee_raddr = callee_addr - self.base_addr
+ callee_sym, callee_ofs = self.lookup_addr(callee_raddr)
+
try:
- callee = profile.functions[symbol]
+ callee = profile.functions[callee_sym]
except KeyError:
- name = demangle(symbol)
- callee = Function(symbol, name)
+ callee_name = demangle(callee_sym)
+ callee = Function(callee_sym, callee_name)
profile.add_function(callee)
- callee[CALLS] = 1
+ callee[CALLS] = samples
callee[SAMPLES] = 0
else:
- callee[CALLS] += 1
+ callee[CALLS] += samples
if caller is not None:
try:
call = caller.calls[callee.id]
except KeyError:
call = Call(callee.id)
- call[CALLS] = 1
+ call[CALLS] = samples
caller.add_call(call)
else:
- call[CALLS] += 1
- caller_stack.append(caller)
-
- caller = callee
-
- else:
- # Function return
- if caller is not None:
- assert caller.id == symbol
- try:
- caller = caller_stack.pop()
- except IndexError:
- caller = None
-
- last_stamp = stamp
+ call[CALLS] += samples
+
+ if options.verbose:
+ if not callee_addr:
+ sys.stderr.write('%s+%u: %u\n' % (caller_sym, caller_ofs, samples))
+ else:
+ sys.stderr.write('%s+%u -> %s+%u: %u\n' % (caller_sym, caller_ofs, callee_sym, callee_ofs, samples))
# compute derived data
profile.validate()
diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/p_debug_prof.c
index 69c914c780..5f9772ef91 100644
--- a/src/gallium/auxiliary/util/p_debug_prof.c
+++ b/src/gallium/auxiliary/util/p_debug_prof.c
@@ -46,97 +46,83 @@
#include "util/u_string.h"
-#define PROFILE_FILE_SIZE 4*1024*1024
+#define PROFILE_TABLE_SIZE (1024*1024)
#define FILE_NAME_SIZE 256
-static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.trace";
-static ULONG_PTR iFile = 0;
-static BYTE *pMap = NULL;
-static BYTE *pMapBegin = NULL;
-static BYTE *pMapEnd = NULL;
+struct debug_profile_entry
+{
+ uintptr_t caller;
+ uintptr_t callee;
+ uint64_t samples;
+};
+static unsigned long enabled = 0;
-void __declspec(naked) __cdecl
-debug_profile_close(void)
-{
- _asm {
- push eax
- push ebx
- push ecx
- push edx
- push ebp
- push edi
- push esi
- }
+static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.prof";
+static ULONG_PTR iFile = 0;
- if(iFile) {
- EngUnmapFile(iFile);
- /* Truncate the file */
- pMap = EngMapFile(wFileName, pMap - pMapBegin, &iFile);
- if(pMap)
- EngUnmapFile(iFile);
- }
- iFile = 0;
- pMapBegin = pMapEnd = pMap = NULL;
-
- _asm {
- pop esi
- pop edi
- pop ebp
- pop edx
- pop ecx
- pop ebx
- pop eax
- ret
- }
-}
+static struct debug_profile_entry *table = NULL;
+static unsigned long free_table_entries = 0;
+static unsigned long max_table_entries = 0;
+uint64_t start_stamp = 0;
+uint64_t end_stamp = 0;
-void __declspec(naked) __cdecl
-debug_profile_open(void)
-{
- WCHAR *p;
-
- _asm {
- push eax
- push ebx
- push ecx
- push edx
- push ebp
- push edi
- push esi
- }
- debug_profile_close();
+static void
+debug_profile_entry(uintptr_t caller, uintptr_t callee, uint64_t samples)
+{
+ unsigned hash = ( caller + callee ) & PROFILE_TABLE_SIZE - 1;
- // increment starting from the less significant digit
- p = &wFileName[14];
while(1) {
- if(*p == '9') {
- *p-- = '0';
+ if(table[hash].caller == 0 && table[hash].callee == 0) {
+ table[hash].caller = caller;
+ table[hash].callee = callee;
+ table[hash].samples = samples;
+ --free_table_entries;
+ break;
}
- else {
- *p += 1;
+ else if(table[hash].caller == caller && table[hash].callee == callee) {
+ table[hash].samples += samples;
break;
}
+ else {
+ ++hash;
+ }
}
+}
- pMap = EngMapFile(wFileName, PROFILE_FILE_SIZE, &iFile);
- if(pMap) {
- pMapBegin = pMap;
- pMapEnd = pMap + PROFILE_FILE_SIZE;
- }
-
- _asm {
- pop esi
- pop edi
- pop ebp
- pop edx
- pop ecx
- pop ebx
- pop eax
- ret
- }
+
+static uintptr_t caller_stack[1024];
+static unsigned last_caller = 0;
+
+
+static int64_t delta(void) {
+ int64_t result = end_stamp - start_stamp;
+ if(result > UINT64_C(0xffffffff))
+ result = 0;
+ return result;
+}
+
+
+static void __cdecl
+debug_profile_enter(uintptr_t callee)
+{
+ uintptr_t caller = last_caller ? caller_stack[last_caller - 1] : 0;
+
+ if (caller)
+ debug_profile_entry(caller, 0, delta());
+ debug_profile_entry(caller, callee, 1);
+ caller_stack[last_caller++] = callee;
+}
+
+
+static void __cdecl
+debug_profile_exit(uintptr_t callee)
+{
+ debug_profile_entry(callee, 0, delta());
+ if(last_caller)
+ --last_caller;
}
@@ -148,31 +134,49 @@ debug_profile_open(void)
void __declspec(naked) __cdecl
_penter(void) {
_asm {
- push ebx
-retry:
- mov ebx, [pMap]
- test ebx, ebx
- jz done
- cmp ebx, [pMapEnd]
- jne ready
- call debug_profile_open
- jmp retry
-ready:
push eax
+ mov eax, [enabled]
+ test eax, eax
+ jz skip
+
push edx
- mov eax, [esp+12]
- and eax, 0xfffffffe
- mov [ebx], eax
- add ebx, 4
+
rdtsc
- mov [ebx], eax
- add ebx, 4
- mov [pMap], ebx
+ mov dword ptr [end_stamp], eax
+ mov dword ptr [end_stamp+4], edx
+
+ xor eax, eax
+ mov [enabled], eax
+
+ mov eax, [esp+8]
+
+ push ebx
+ push ecx
+ push ebp
+ push edi
+ push esi
+
+ push eax
+ call debug_profile_enter
+ add esp, 4
+
+ pop esi
+ pop edi
+ pop ebp
+ pop ecx
+ pop ebx
+
+ mov eax, 1
+ mov [enabled], eax
+
+ rdtsc
+ mov dword ptr [start_stamp], eax
+ mov dword ptr [start_stamp+4], edx
+
pop edx
+skip:
pop eax
-done:
- pop ebx
- ret
+ ret
}
}
@@ -185,30 +189,48 @@ done:
void __declspec(naked) __cdecl
_pexit(void) {
_asm {
- push ebx
-retry:
- mov ebx, [pMap]
- test ebx, ebx
- jz done
- cmp ebx, [pMapEnd]
- jne ready
- call debug_profile_open
- jmp retry
-ready:
push eax
+ mov eax, [enabled]
+ test eax, eax
+ jz skip
+
push edx
- mov eax, [esp+12]
- or eax, 0x00000001
- mov [ebx], eax
- add ebx, 4
+
rdtsc
- mov [ebx], eax
- add ebx, 4
- mov [pMap], ebx
+ mov dword ptr [end_stamp], eax
+ mov dword ptr [end_stamp+4], edx
+
+ xor eax, eax
+ mov [enabled], eax
+
+ mov eax, [esp+8]
+
+ push ebx
+ push ecx
+ push ebp
+ push edi
+ push esi
+
+ push eax
+ call debug_profile_exit
+ add esp, 4
+
+ pop esi
+ pop edi
+ pop ebp
+ pop ecx
+ pop ebx
+
+ mov eax, 1
+ mov [enabled], eax
+
+ rdtsc
+ mov dword ptr [start_stamp], eax
+ mov dword ptr [start_stamp+4], edx
+
pop edx
+skip:
pop eax
-done:
- pop ebx
ret
}
}
@@ -230,15 +252,53 @@ __debug_profile_reference(void) {
void
debug_profile_start(void)
{
- debug_profile_open();
-
- if(pMap) {
+ WCHAR *p;
+
+ // increment starting from the less significant digit
+ p = &wFileName[14];
+ while(1) {
+ if(*p == '9') {
+ *p-- = '0';
+ }
+ else {
+ *p += 1;
+ break;
+ }
+ }
+
+ table = EngMapFile(wFileName,
+ PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry),
+ &iFile);
+ if(table) {
unsigned i;
+
+ free_table_entries = max_table_entries = PROFILE_TABLE_SIZE;
+ memset(table, 0, PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry));
+
+ table[0].caller = (uintptr_t)&__debug_profile_reference;
+ table[0].callee = 0;
+ table[0].samples = 0;
+ --free_table_entries;
+
+ _asm {
+ push edx
+ push eax
+
+ rdtsc
+ mov dword ptr [start_stamp], eax
+ mov dword ptr [start_stamp+4], edx
+
+ pop edx
+ pop eax
+ }
+
+ last_caller = 0;
+
+ enabled = 1;
+
for(i = 0; i < 8; ++i) {
_asm {
- call _penter
call __debug_profile_reference
- call _pexit
}
}
}
@@ -248,7 +308,13 @@ debug_profile_start(void)
void
debug_profile_stop(void)
{
- debug_profile_close();
+ enabled = 0;
+
+ if(iFile)
+ EngUnmapFile(iFile);
+ iFile = 0;
+ table = NULL;
+ free_table_entries = max_table_entries = 0;
}
#endif /* PROFILE */