summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2008-10-15 20:46:43 -0600
committerBrian Paul <brian.paul@tungstengraphics.com>2008-10-15 20:56:55 -0600
commit0116ee1d1c341726b6ed23c2dddc4515e8a34385 (patch)
tree33307d93cbb261ebd4b707b53380b371b66937c3 /src
parent1c915b14a545ffb10cc1c98cc69f997b6471617f (diff)
cell: start some performance measurements
Use the spu_write_decrementer() and spu_read_decrementer() functions to measure time. Convert to milliseconds according to the system timebase value.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/cell/common.h1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.c31
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c15
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c9
4 files changed, 55 insertions, 1 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index 0ff2c491fb..469d56cda8 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -299,6 +299,7 @@ struct cell_init_info
unsigned id;
unsigned num_spus;
unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */
+ float inv_timebase; /**< 1.0/timebase, for perf measurement */
/** Buffers for command batches, vertex/index data */
ubyte *buffers[CELL_NUM_BUFFERS];
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
index a6e268b362..28e5e6d706 100644
--- a/src/gallium/drivers/cell/ppu/cell_spu.c
+++ b/src/gallium/drivers/cell/ppu/cell_spu.c
@@ -53,6 +53,35 @@ struct cell_global_info cell_global;
/**
+ * Scan /proc/cpuinfo to determine the timebase for the system.
+ * This is used by the SPUs to convert 'decrementer' ticks to seconds.
+ * There may be a better way to get this value...
+ */
+static unsigned
+get_timebase(void)
+{
+ FILE *f = fopen("/proc/cpuinfo", "r");
+ unsigned timebase;
+
+ assert(f);
+ while (!feof(f)) {
+ char line[80];
+ fgets(line, sizeof(line), f);
+ if (strncmp(line, "timebase", 8) == 0) {
+ char *colon = strchr(line, ':');
+ if (colon) {
+ timebase = atoi(colon + 2);
+ break;
+ }
+ }
+ }
+ fclose(f);
+
+ return timebase;
+}
+
+
+/**
* Write a 1-word message to the given SPE mailbox.
*/
void
@@ -115,6 +144,7 @@ cell_start_spus(struct cell_context *cell)
{
static boolean one_time_init = FALSE;
uint i, j;
+ uint timebase = get_timebase();
if (one_time_init) {
fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
@@ -138,6 +168,7 @@ cell_start_spus(struct cell_context *cell)
cell_global.inits[i].id = i;
cell_global.inits[i].num_spus = cell->num_spus;
cell_global.inits[i].debug_flags = cell->debug_flags;
+ cell_global.inits[i].inv_timebase = 1000.0f / timebase;
for (j = 0; j < CELL_NUM_BUFFERS; j++) {
cell_global.inits[i].buffers[j] = cell->buffer[j];
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index d2c282a022..57d265fef7 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -670,6 +670,8 @@ cmd_batch(uint opcode)
}
+#define PERF 0
+
/**
* Main loop for SPEs: Get a command, execute it, repeat.
@@ -678,6 +680,7 @@ void
command_loop(void)
{
int exitFlag = 0;
+ uint t0, t1;
D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
@@ -686,10 +689,16 @@ command_loop(void)
D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
+ if (PERF)
+ spu_write_decrementer(~0);
+
/* read/wait from mailbox */
opcode = (unsigned int) spu_read_in_mbox();
D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
+ if (PERF)
+ t0 = spu_read_decrementer();
+
switch (opcode & CELL_CMD_OPCODE_MASK) {
case CELL_CMD_EXIT:
D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
@@ -707,6 +716,12 @@ command_loop(void)
printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
}
+ if (PERF) {
+ t1 = spu_read_decrementer();
+ printf("wait mbox time: %gms batch time: %gms\n",
+ (~0u - t0) * spu.init.inv_timebase,
+ (t0 - t1) * spu.init.inv_timebase);
+ }
}
D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 802455bf79..5515bb55c9 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -175,6 +175,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
const ubyte *vertices;
const ushort *indexes;
uint i, j;
+ uint num_tiles;
D_PRINTF(CELL_DEBUG_CMD,
"RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n",
@@ -242,6 +243,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
+ num_tiles = 0;
+
/**
** loop over tiles, rendering tris
**/
@@ -255,6 +258,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
if (!my_tile(tx, ty))
continue;
+ num_tiles++;
+
spu.cur_ctile_status = spu.ctile_status[ty][tx];
spu.cur_ztile_status = spu.ztile_status[ty][tx];
@@ -284,5 +289,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
spu.ztile_status[ty][tx] = spu.cur_ztile_status;
}
- D_PRINTF(CELL_DEBUG_CMD, "RENDER done\n");
+ D_PRINTF(CELL_DEBUG_CMD,
+ "RENDER done (%u tiles hit)\n",
+ num_tiles);
}