diff options
Diffstat (limited to 'progs/perf')
-rw-r--r-- | progs/perf/Makefile | 8 | ||||
-rw-r--r-- | progs/perf/SConscript | 6 | ||||
-rw-r--r-- | progs/perf/common.c | 18 | ||||
-rw-r--r-- | progs/perf/common.h | 5 | ||||
-rw-r--r-- | progs/perf/copytex.c | 214 | ||||
-rw-r--r-- | progs/perf/drawoverhead.c | 17 | ||||
-rw-r--r-- | progs/perf/fbobind.c | 153 | ||||
-rw-r--r-- | progs/perf/fill.c | 248 | ||||
-rw-r--r-- | progs/perf/genmipmap.c | 136 | ||||
-rw-r--r-- | progs/perf/glmain.c | 142 | ||||
-rw-r--r-- | progs/perf/glmain.h | 15 | ||||
-rw-r--r-- | progs/perf/readpixels.c | 169 | ||||
-rw-r--r-- | progs/perf/swapbuffers.c | 161 | ||||
-rw-r--r-- | progs/perf/teximage.c | 182 | ||||
-rw-r--r-- | progs/perf/vbo.c | 166 | ||||
-rw-r--r-- | progs/perf/vertexrate.c | 19 |
16 files changed, 1568 insertions, 91 deletions
diff --git a/progs/perf/Makefile b/progs/perf/Makefile index 219667439f..f7b965542e 100644 --- a/progs/perf/Makefile +++ b/progs/perf/Makefile @@ -10,11 +10,17 @@ LIBS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLEW_LIB) \ # using : to avoid APP_CC pointing to CC loop CC := $(APP_CC) -CFLAGS += -I$(INCDIR) +CFLAGS := -I$(INCDIR) $(CFLAGS) LDLIBS = $(LIBS) PROG_SOURCES = \ + copytex.c \ drawoverhead.c \ + fbobind.c \ + fill.c \ + genmipmap.c \ + readpixels.c \ + swapbuffers.c \ teximage.c \ vbo.c \ vertexrate.c \ diff --git a/progs/perf/SConscript b/progs/perf/SConscript index c019dc95b0..a5ec9a7c2a 100644 --- a/progs/perf/SConscript +++ b/progs/perf/SConscript @@ -8,7 +8,13 @@ env = env.Clone() env.Prepend(LIBS = ['$GLUT_LIB']) progs = [ + 'copytex', 'drawoverhead', + 'fbobind', + 'fill', + 'genmipmap', + 'readpixels', + 'swapbuffers', 'teximage', 'vbo', 'vertexrate', diff --git a/progs/perf/common.c b/progs/perf/common.c index 695b8a99d9..722f4b7b45 100644 --- a/progs/perf/common.c +++ b/progs/perf/common.c @@ -113,3 +113,21 @@ PerfMeasureRate(PerfRateFunc f) } +/* Note static buffer, can only use once per printf. + */ +const char * +PerfHumanFloat( double d ) +{ + static char buf[80]; + + if (d > 1000000000.0) + snprintf(buf, sizeof(buf), "%.1f billion", d / 1000000000.0); + else if (d > 1000000.0) + snprintf(buf, sizeof(buf), "%.1f million", d / 1000000.0); + else if (d > 1000.0) + snprintf(buf, sizeof(buf), "%.1f thousand", d / 1000.0); + else + snprintf(buf, sizeof(buf), "%.1f", d); + + return buf; +} diff --git a/progs/perf/common.h b/progs/perf/common.h index 85db678c64..6ea17402b5 100644 --- a/progs/perf/common.h +++ b/progs/perf/common.h @@ -24,12 +24,17 @@ #define COMMON_H +#include <stddef.h> /* for offsetof() */ + + typedef void (*PerfRateFunc)(unsigned count); extern double PerfMeasureRate(PerfRateFunc f); +const char * +PerfHumanFloat( double d ); extern void perf_printf(const char *format, ...); diff --git a/progs/perf/copytex.c b/progs/perf/copytex.c new file mode 100644 index 0000000000..f7a6b8aec3 --- /dev/null +++ b/progs/perf/copytex.c @@ -0,0 +1,214 @@ +/* + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Measure glCopyTex[Sub]Image() rate. + * Create a large, off-screen framebuffer object for rendering and + * copying the texture data from it since we can't make really large + * on-screen windows. + * + * Brian Paul + * 22 Sep 2009 + */ + +#include <string.h> +#include "glmain.h" +#include "common.h" + +int WinWidth = 100, WinHeight = 100; + +static GLuint VBO, FBO, RBO, Tex; + +const GLsizei MinSize = 16, MaxSize = 4096; +static GLsizei TexSize; + +static const GLboolean DrawPoint = GL_TRUE; +static const GLboolean TexSubImage4 = GL_FALSE; + +struct vertex +{ + GLfloat x, y, s, t; +}; + +static const struct vertex vertices[1] = { + { 0.0, 0.0, 0.5, 0.5 }, +}; + +#define VOFFSET(F) ((void *) offsetof(struct vertex, F)) + + +/** Called from test harness/main */ +void +PerfInit(void) +{ + const GLenum filter = GL_LINEAR; + GLenum stat; + + if (!PerfExtensionSupported("GL_EXT_framebuffer_object")) { + perf_printf("copytex: GL_EXT_framebuffer_object not supported\n"); + exit(0); + } + + /* setup VBO */ + glGenBuffersARB(1, &VBO); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, VBO); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(vertices), + vertices, GL_STATIC_DRAW_ARB); + + glVertexPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(x)); + glTexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(s)); + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + + /* setup texture */ + glGenTextures(1, &Tex); + glBindTexture(GL_TEXTURE_2D, Tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); + glEnable(GL_TEXTURE_2D); + + /* setup rbo */ + glGenRenderbuffersEXT(1, &RBO); + glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, RBO); + glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGBA, MaxSize, MaxSize); + + /* setup fbo */ + glGenFramebuffersEXT(1, &FBO); + glBindFramebufferEXT(GL_FRAMEBUFFER, FBO); + glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, + GL_COLOR_ATTACHMENT0_EXT, + GL_RENDERBUFFER_EXT, RBO); + + stat = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT); + if (stat != GL_FRAMEBUFFER_COMPLETE_EXT) { + perf_printf("fboswitch: Error: incomplete FBO!\n"); + exit(1); + } + + /* clear the FBO */ + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + glViewport(0, 0, MaxSize, MaxSize); + glClear(GL_COLOR_BUFFER_BIT); +} + + +static void +CopyTexImage(unsigned count) +{ + unsigned i; + for (i = 1; i < count; i++) { + /* draw something */ + if (DrawPoint) + glDrawArrays(GL_POINTS, 0, 1); + + /* copy whole texture */ + glCopyTexImage2D(GL_TEXTURE_2D, 0, + GL_RGBA, 0, 0, TexSize, TexSize, 0); + } + glFinish(); +} + + +static void +CopyTexSubImage(unsigned count) +{ + unsigned i; + for (i = 1; i < count; i++) { + /* draw something */ + if (DrawPoint) + glDrawArrays(GL_POINTS, 0, 1); + + /* copy sub texture */ + if (TexSubImage4) { + /* four sub-copies */ + GLsizei half = TexSize / 2; + /* lower-left */ + glCopyTexSubImage2D(GL_TEXTURE_2D, 0, + 0, 0, 0, 0, half, half); + /* lower-right */ + glCopyTexSubImage2D(GL_TEXTURE_2D, 0, + half, 0, half, 0, half, half); + /* upper-left */ + glCopyTexSubImage2D(GL_TEXTURE_2D, 0, + 0, half, 0, half, half, half); + /* upper-right */ + glCopyTexSubImage2D(GL_TEXTURE_2D, 0, + half, half, half, half, half, half); + } + else { + /* one big copy */ + glCopyTexSubImage2D(GL_TEXTURE_2D, 0, + 0, 0, 0, 0, TexSize, TexSize); + } + } + glFinish(); +} + + +/** Called from test harness/main */ +void +PerfNextRound(void) +{ +} + + +/** Called from test harness/main */ +void +PerfDraw(void) +{ + double rate, mbPerSec; + GLint sub, maxTexSize; + + glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTexSize); + + /* loop over whole/sub tex copy */ + for (sub = 0; sub < 2; sub++) { + + /* loop over texture sizes */ + for (TexSize = MinSize; TexSize <= MaxSize; TexSize *= 4) { + + if (TexSize <= maxTexSize) { + GLint bytesPerImage = 4 * TexSize * TexSize; + + if (sub == 0) + rate = PerfMeasureRate(CopyTexImage); + else { + /* setup empty dest texture */ + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, + TexSize, TexSize, 0, + GL_RGBA, GL_UNSIGNED_BYTE, NULL); + rate = PerfMeasureRate(CopyTexSubImage); + } + + mbPerSec = rate * bytesPerImage / (1024.0 * 1024.0); + } + else { + rate = 0.0; + mbPerSec = 0.0; + } + + perf_printf(" glCopyTex%sImage(%d x %d): %.1f copies/sec, %.1f Mpixels/sec\n", + (sub ? "Sub" : ""), TexSize, TexSize, rate, mbPerSec); + } + } + + exit(0); +} diff --git a/progs/perf/drawoverhead.c b/progs/perf/drawoverhead.c index c1e166e746..f75c9bb74e 100644 --- a/progs/perf/drawoverhead.c +++ b/progs/perf/drawoverhead.c @@ -27,7 +27,6 @@ * easily portable to other APIs. * * All the window-system stuff should be contained in glmain.c (or TBDmain.c). - * All the re-usable, generic code should be in common.c (XXX not done yet). * * Brian Paul * 15 Sep 2009 @@ -108,6 +107,10 @@ DrawStateChange(unsigned count) glFinish(); } +void +PerfNextRound(void) +{ +} /** Called from test harness/main */ void @@ -116,18 +119,18 @@ PerfDraw(void) double rate0, rate1, rate2, overhead; rate0 = PerfMeasureRate(DrawNoStateChange); - perf_printf(" Draw only: %.1f draws/second\n", rate0); + perf_printf(" Draw only: %s draws/second\n", + PerfHumanFloat(rate0)); - rate1 = PerfMeasureRate(DrawNopStateChange); overhead = 1000.0 * (1.0 / rate1 - 1.0 / rate0); - perf_printf(" Draw w/ nop state change: %.1f draws/sec (overhead: %f ms/draw)\n", - rate1, overhead); + perf_printf(" Draw w/ nop state change: %s draws/sec (overhead: %f ms/draw)\n", + PerfHumanFloat(rate1), overhead); rate2 = PerfMeasureRate(DrawStateChange); overhead = 1000.0 * (1.0 / rate2 - 1.0 / rate0); - perf_printf(" Draw w/ state change: %.1f draws/sec (overhead: %f ms/draw)\n", - rate2, overhead); + perf_printf(" Draw w/ state change: %s draws/sec (overhead: %f ms/draw)\n", + PerfHumanFloat(rate2), overhead); exit(0); } diff --git a/progs/perf/fbobind.c b/progs/perf/fbobind.c new file mode 100644 index 0000000000..fb52a93a2f --- /dev/null +++ b/progs/perf/fbobind.c @@ -0,0 +1,153 @@ +/* + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Measure rate of binding/switching between FBO targets. + * Create two framebuffer objects for rendering to two textures. + * Ping pong between texturing from one and drawing into the other. + * + * Brian Paul + * 22 Sep 2009 + */ + +#include <string.h> +#include "glmain.h" +#include "common.h" + +int WinWidth = 100, WinHeight = 100; + +static GLuint VBO; + +static GLuint FBO[2], Tex[2]; + +static const GLsizei TexSize = 512; + +static const GLboolean DrawPoint = GL_TRUE; + +struct vertex +{ + GLfloat x, y, s, t; +}; + +static const struct vertex vertices[1] = { + { 0.0, 0.0, 0.5, 0.5 }, +}; + +#define VOFFSET(F) ((void *) offsetof(struct vertex, F)) + + +/** Called from test harness/main */ +void +PerfInit(void) +{ + const GLenum filter = GL_LINEAR; + GLenum stat; + int i; + + if (!PerfExtensionSupported("GL_EXT_framebuffer_object")) { + perf_printf("fboswitch: GL_EXT_framebuffer_object not supported\n"); + exit(0); + } + + /* setup VBO */ + glGenBuffersARB(1, &VBO); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, VBO); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(vertices), + vertices, GL_STATIC_DRAW_ARB); + + glVertexPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(x)); + glTexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(s)); + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + + glGenFramebuffersEXT(2, FBO); + glGenTextures(2, Tex); + + for (i = 0; i < 2; i++) { + /* setup texture */ + glBindTexture(GL_TEXTURE_2D, Tex[i]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, + TexSize, TexSize, 0, + GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); + + + /* setup fbo */ + glBindFramebufferEXT(GL_FRAMEBUFFER, FBO[i]); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, + GL_COLOR_ATTACHMENT0_EXT, + GL_TEXTURE_2D, Tex[i], 0/*level*/); + stat = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT); + if (stat != GL_FRAMEBUFFER_COMPLETE_EXT) { + perf_printf("fboswitch: Error: incomplete FBO!\n"); + exit(1); + } + + /* clear the FBO */ + glClear(GL_COLOR_BUFFER_BIT); + } + + glEnable(GL_TEXTURE_2D); +} + + +static void +FBOBind(unsigned count) +{ + unsigned i; + for (i = 1; i < count; i++) { + const GLuint dst = i & 1; + const GLuint src = 1 - dst; + + /* bind src texture */ + glBindTexture(GL_TEXTURE_2D, Tex[src]); + + /* bind dst fbo */ + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, FBO[dst]); + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + + /* draw something */ + if (DrawPoint) + glDrawArrays(GL_POINTS, 0, 1); + } + glFinish(); +} + + +/** Called from test harness/main */ +void +PerfNextRound(void) +{ +} + + +/** Called from test harness/main */ +void +PerfDraw(void) +{ + double rate; + + rate = PerfMeasureRate(FBOBind); + perf_printf(" FBO Binding: %1.f binds/sec\n", rate); + + exit(0); +} diff --git a/progs/perf/fill.c b/progs/perf/fill.c new file mode 100644 index 0000000000..279f2b5f18 --- /dev/null +++ b/progs/perf/fill.c @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Measure fill rates. + * + * Brian Paul + * 21 Sep 2009 + */ + +#include "glmain.h" +#include "common.h" + + +int WinWidth = 1000, WinHeight = 1000; + +static GLuint VBO, TexObj; + + +struct vertex +{ + GLfloat x, y, s, t, r, g, b, a; +}; + +#define VOFFSET(F) ((void *) offsetof(struct vertex, F)) + +static const struct vertex vertices[4] = { + /* x y s t r g b a */ + { -1.0, -1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.5 }, + { 1.0, -1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.5 }, + { 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.5 }, + { -1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.5 } +}; + + +static const char *VertexShader = + "void main() \n" + "{ \n" + " gl_Position = ftransform(); \n" + " gl_TexCoord[0] = gl_MultiTexCoord0; \n" + " gl_FrontColor = gl_Color; \n" + "} \n"; + +/* simple fragment shader */ +static const char *FragmentShader1 = + "uniform sampler2D Tex; \n" + "void main() \n" + "{ \n" + " vec4 t = texture2D(Tex, gl_TexCoord[0].xy); \n" + " gl_FragColor = vec4(1.0) - t * gl_Color; \n" + "} \n"; + +/** + * A more complex fragment shader (but equivalent to first shader). + * A good optimizer should catch some of these no-op operations, but + * probably not all of them. + */ +static const char *FragmentShader2 = + "uniform sampler2D Tex; \n" + "void main() \n" + "{ \n" + " // as above \n" + " vec4 t = texture2D(Tex, gl_TexCoord[0].xy); \n" + " t = vec4(1.0) - t * gl_Color; \n" + + " vec4 u; \n" + + " // no-op negate/swizzle \n" + " u = -t.wzyx; \n" + " t = -u.wzyx; \n" + + " // no-op inverts \n" + " t = vec4(1.0) - t; \n" + " t = vec4(1.0) - t; \n" + + " // no-op min/max \n" + " t = min(t, t); \n" + " t = max(t, t); \n" + + " // no-op moves \n" + " u = t; \n" + " t = u; \n" + " u = t; \n" + " t = u; \n" + + " // no-op add/mul \n" + " t = (t + t + t + t) * 0.25; \n" + + " // no-op mul/sub \n" + " t = 3.0 * t - 2.0 * t; \n" + + " // no-op negate/min/max \n" + " t = -min(-t, -t); \n" + " t = -max(-t, -t); \n" + + " gl_FragColor = t; \n" + "} \n"; + +static GLuint ShaderProg1, ShaderProg2; + + + +/** Called from test harness/main */ +void +PerfInit(void) +{ + GLint u; + + /* setup VBO w/ vertex data */ + glGenBuffersARB(1, &VBO); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, VBO); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, + sizeof(vertices), vertices, GL_STATIC_DRAW_ARB); + glVertexPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(x)); + glTexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(s)); + glColorPointer(4, GL_FLOAT, sizeof(struct vertex), VOFFSET(r)); + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState(GL_COLOR_ARRAY); + + /* setup texture */ + TexObj = PerfCheckerTexture(128, 128); + + /* setup shaders */ + ShaderProg1 = PerfShaderProgram(VertexShader, FragmentShader1); + glUseProgram(ShaderProg1); + u = glGetUniformLocation(ShaderProg1, "Tex"); + glUniform1i(u, 0); /* texture unit 0 */ + + ShaderProg2 = PerfShaderProgram(VertexShader, FragmentShader2); + glUseProgram(ShaderProg2); + u = glGetUniformLocation(ShaderProg2, "Tex"); + glUniform1i(u, 0); /* texture unit 0 */ + + glUseProgram(0); +} + + +static void +Ortho(void) +{ + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); +} + + + +static void +DrawQuad(unsigned count) +{ + unsigned i; + glClear(GL_COLOR_BUFFER_BIT); + + for (i = 0; i < count; i++) { + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + + /* Avoid sending command buffers with huge numbers of fullscreen + * quads. Graphics schedulers don't always cope well with + * this... + */ + if (i % 128 == 0) { + PerfSwapBuffers(); + glClear(GL_COLOR_BUFFER_BIT); + } + } + + glFinish(); + + if (1) + PerfSwapBuffers(); +} + +void +PerfNextRound(void) +{ +} + +/** Called from test harness/main */ +void +PerfDraw(void) +{ + double rate; + double pixelsPerDraw = WinWidth * WinHeight; + + Ortho(); + + /* simple fill */ + rate = PerfMeasureRate(DrawQuad) * pixelsPerDraw; + perf_printf(" Simple fill: %s pixels/second\n", + PerfHumanFloat(rate)); + + /* blended fill */ + glEnable(GL_BLEND); + rate = PerfMeasureRate(DrawQuad) * pixelsPerDraw; + glDisable(GL_BLEND); + perf_printf(" Blended fill: %s pixels/second\n", + PerfHumanFloat(rate)); + + /* textured fill */ + glEnable(GL_TEXTURE_2D); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + rate = PerfMeasureRate(DrawQuad) * pixelsPerDraw; + glDisable(GL_TEXTURE_2D); + glDisableClientState(GL_TEXTURE_COORD_ARRAY); + perf_printf(" Textured fill: %s pixels/second\n", + PerfHumanFloat(rate)); + + /* shader1 fill */ + glUseProgram(ShaderProg1); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + rate = PerfMeasureRate(DrawQuad) * pixelsPerDraw; + glUseProgram(0); + glDisableClientState(GL_TEXTURE_COORD_ARRAY); + perf_printf(" Shader1 fill: %s pixels/second\n", + PerfHumanFloat(rate)); + + /* shader2 fill */ + glUseProgram(ShaderProg2); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + rate = PerfMeasureRate(DrawQuad) * pixelsPerDraw; + glUseProgram(0); + glDisableClientState(GL_TEXTURE_COORD_ARRAY); + perf_printf(" Shader2 fill: %s pixels/second\n", + PerfHumanFloat(rate)); + + exit(0); +} + diff --git a/progs/perf/genmipmap.c b/progs/perf/genmipmap.c new file mode 100644 index 0000000000..4b7d6ad155 --- /dev/null +++ b/progs/perf/genmipmap.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Measure glGenerateMipmap() speed. + * + * Brian Paul + * 24 Sep 2009 + */ + +#include <string.h> +#include "glmain.h" +#include "common.h" + + +int WinWidth = 100, WinHeight = 100; + +static GLboolean DrawPoint = GL_TRUE; +static GLuint VBO; +static GLuint TexObj = 0; +static GLint BaseLevel, MaxLevel; + +struct vertex +{ + GLfloat x, y, s, t; +}; + +static const struct vertex vertices[1] = { + { 0.0, 0.0, 0.5, 0.5 }, +}; + +#define VOFFSET(F) ((void *) offsetof(struct vertex, F)) + +/** Called from test harness/main */ +void +PerfInit(void) +{ + /* setup VBO w/ vertex data */ + glGenBuffersARB(1, &VBO); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, VBO); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, + sizeof(vertices), vertices, GL_STATIC_DRAW_ARB); + glVertexPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(x)); + glTexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(s)); + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + + glGenTextures(1, &TexObj); + glBindTexture(GL_TEXTURE_2D, TexObj); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glEnable(GL_TEXTURE_2D); +} + + +static void +GenMipmap(unsigned count) +{ + unsigned i; + for (i = 0; i < count; i++) { + GLubyte texel[4]; + texel[0] = texel[1] = texel[2] = texel[3] = i & 0xff; + /* dirty the base image */ + glTexSubImage2D(GL_TEXTURE_2D, BaseLevel, + 0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, texel); + glGenerateMipmap(GL_TEXTURE_2D); + if (DrawPoint) + glDrawArrays(GL_POINTS, 0, 1); + } + glFinish(); +} + + +/** Called from test harness/main */ +void +PerfNextRound(void) +{ +} + + +/** Called from test harness/main */ +void +PerfDraw(void) +{ + const GLint NumLevels = 12; + const GLint TexWidth = 2048, TexHeight = 2048; + GLubyte *img; + double rate; + + /* Make 2K x 2K texture */ + img = (GLubyte *) malloc(TexWidth * TexHeight * 4); + memset(img, 128, TexWidth * TexHeight * 4); + glTexImage2D(GL_TEXTURE_2D, 0, + GL_RGBA, TexWidth, TexHeight, 0, + GL_RGBA, GL_UNSIGNED_BYTE, img); + free(img); + + perf_printf("Texture level[0] size: %d x %d, %d levels\n", + TexWidth, TexHeight, NumLevels); + + /* loop over base levels 0, 2, 4 */ + for (BaseLevel = 0; BaseLevel <= 4; BaseLevel += 2) { + + /* loop over max level */ + for (MaxLevel = NumLevels; MaxLevel > BaseLevel; MaxLevel--) { + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, BaseLevel); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, MaxLevel); + + rate = PerfMeasureRate(GenMipmap); + + perf_printf(" glGenerateMipmap(levels %d..%d): %.2f gens/sec\n", + BaseLevel + 1, MaxLevel, rate); + } + } + + exit(0); +} diff --git a/progs/perf/glmain.c b/progs/perf/glmain.c index 62d14259f8..69cdbce319 100644 --- a/progs/perf/glmain.c +++ b/progs/perf/glmain.c @@ -26,13 +26,13 @@ */ +#include <stdio.h> #include "glmain.h" #include <GL/glut.h> static int Win; static GLfloat Xrot = 0, Yrot = 0, Zrot = 0; -static GLboolean Anim = GL_FALSE; /** Return time in seconds */ @@ -50,13 +50,133 @@ PerfSwapBuffers(void) } +/** make simple checkerboard texture object */ +GLuint +PerfCheckerTexture(GLsizei width, GLsizei height) +{ + const GLenum filter = GL_NEAREST; + GLubyte *img = (GLubyte *) malloc(width * height * 4); + GLint i, j, k; + GLuint obj; + + k = 0; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + GLubyte color; + if (((i / 8) ^ (j / 8)) & 1) { + color = 0xff; + } + else { + color = 0x0; + } + img[k++] = color; + img[k++] = color; + img[k++] = color; + img[k++] = color; + } + } + + glGenTextures(1, &obj); + glBindTexture(GL_TEXTURE_2D, obj); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, + GL_RGBA, GL_UNSIGNED_BYTE, img); + free(img); + + return obj; +} + + +static GLuint +CompileShader(GLenum type, const char *shader) +{ + GLuint sh; + GLint stat; + + sh = glCreateShader(type); + glShaderSource(sh, 1, (const GLchar **) &shader, NULL); + + glCompileShader(sh); + + glGetShaderiv(sh, GL_COMPILE_STATUS, &stat); + if (!stat) { + GLchar log[1000]; + GLsizei len; + glGetShaderInfoLog(sh, 1000, &len, log); + fprintf(stderr, "Error: problem compiling shader: %s\n", log); + exit(1); + } + + return sh; +} + + +/** Make shader program from given vert/frag shader text */ +GLuint +PerfShaderProgram(const char *vertShader, const char *fragShader) +{ + GLuint prog; + GLint stat; + + { + const char *version = (const char *) glGetString(GL_VERSION); + if ((version[0] != '2' && + version[0] != '3') || version[1] != '.') { + fprintf(stderr, "Error: GL version 2.x or better required\n"); + exit(1); + } + } + + prog = glCreateProgram(); + + if (vertShader) { + GLuint vs = CompileShader(GL_VERTEX_SHADER, vertShader); + glAttachShader(prog, vs); + } + if (fragShader) { + GLuint fs = CompileShader(GL_FRAGMENT_SHADER, fragShader); + glAttachShader(prog, fs); + } + + glLinkProgram(prog); + glGetProgramiv(prog, GL_LINK_STATUS, &stat); + if (!stat) { + GLchar log[1000]; + GLsizei len; + glGetProgramInfoLog(prog, 1000, &len, log); + fprintf(stderr, "Shader link error:\n%s\n", log); + exit(1); + } + + return prog; +} + + +int +PerfReshapeWindow( unsigned w, unsigned h ) +{ + if (glutGet(GLUT_SCREEN_WIDTH) < w || + glutGet(GLUT_SCREEN_HEIGHT) < h) + return 0; + + glutReshapeWindow( w, h ); + glutPostRedisplay(); + return 1; +} + + +GLboolean +PerfExtensionSupported(const char *ext) +{ + return glutExtensionSupported(ext); +} + + static void Idle(void) { - Xrot += 3.0; - Yrot += 4.0; - Zrot += 2.0; - glutPostRedisplay(); + PerfNextRound(); } @@ -90,13 +210,6 @@ Key(unsigned char key, int x, int y) (void) x; (void) y; switch (key) { - case 'a': - Anim = !Anim; - if (Anim) - glutIdleFunc(Idle); - else - glutIdleFunc(NULL); - break; case 'z': Zrot -= step; break; @@ -141,15 +254,14 @@ main(int argc, char *argv[]) { glutInit(&argc, argv); glutInitWindowSize(WinWidth, WinHeight); - glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH | GLUT_STENCIL); Win = glutCreateWindow(argv[0]); glewInit(); glutReshapeFunc(Reshape); glutKeyboardFunc(Key); glutSpecialFunc(SpecialKey); glutDisplayFunc(Draw); - if (Anim) - glutIdleFunc(Idle); + glutIdleFunc(Idle); PerfInit(); glutMainLoop(); return 0; diff --git a/progs/perf/glmain.h b/progs/perf/glmain.h index fe11d7235e..d9bcd5f4e2 100644 --- a/progs/perf/glmain.h +++ b/progs/perf/glmain.h @@ -40,6 +40,18 @@ PerfGetTime(void); extern void PerfSwapBuffers(void); +extern GLuint +PerfCheckerTexture(GLsizei width, GLsizei height); + +extern GLuint +PerfShaderProgram(const char *vertShader, const char *fragShader); + +extern int +PerfReshapeWindow( unsigned w, unsigned h ); + +extern GLboolean +PerfExtensionSupported(const char *ext); + /** Test programs must implement these functions **/ @@ -47,6 +59,9 @@ extern void PerfInit(void); extern void +PerfNextRound(void); + +extern void PerfDraw(void); diff --git a/progs/perf/readpixels.c b/progs/perf/readpixels.c new file mode 100644 index 0000000000..ac7dc426e9 --- /dev/null +++ b/progs/perf/readpixels.c @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Measure glReadPixels speed. + * XXX also read into a PBO? + * XXX also read from FBOs? + * + * Brian Paul + * 23 Sep 2009 + */ + +#include <string.h> +#include <assert.h> +#include "glmain.h" +#include "common.h" + +int WinWidth = 1000, WinHeight = 1000; + +static GLuint VBO; + +static const GLboolean DrawPoint = GL_TRUE; +static const GLboolean BufferSubDataInHalves = GL_TRUE; + +static const GLfloat Vertex0[2] = { 0.0, 0.0 }; + +static GLenum HaveDepthStencil; + +static GLenum ReadFormat, ReadType; +static GLint ReadWidth, ReadHeight; +static GLvoid *ReadBuffer; + + +/** Called from test harness/main */ +void +PerfInit(void) +{ + /* setup VBO */ + glGenBuffersARB(1, &VBO); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, VBO); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(Vertex0), Vertex0, GL_STATIC_DRAW_ARB); + glVertexPointer(2, GL_FLOAT, sizeof(Vertex0), (void *) 0); + glEnableClientState(GL_VERTEX_ARRAY); + + glPixelStorei(GL_PACK_ALIGNMENT, 1); + + HaveDepthStencil = PerfExtensionSupported("GL_EXT_packed_depth_stencil"); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + glEnable(GL_DEPTH_TEST); + glEnable(GL_STENCIL_TEST); +} + + +static void +ReadPixels(unsigned count) +{ + unsigned i; + for (i = 0; i < count; i++) { + /* read from random pos */ + GLint x, y; + + x = WinWidth - ReadWidth; + y = WinHeight - ReadHeight; + if (x > 0) + x = rand() % x; + if (y > 0) + y = rand() % y; + + if (DrawPoint) + glDrawArrays(GL_POINTS, 0, 1); + + glReadPixels(x, y, ReadWidth, ReadHeight, + ReadFormat, ReadType, ReadBuffer); + } + glFinish(); +} + + +static const GLsizei Sizes[] = { + 10, + 100, + 500, + 1000, + 0 +}; + + +static const struct { + GLenum format; + GLenum type; + const char *name; + GLuint pixel_size; +} DstFormats[] = { + { GL_RGBA, GL_UNSIGNED_BYTE, "RGBA/ubyte", 4 }, + { GL_BGRA, GL_UNSIGNED_BYTE, "BGRA/ubyte", 4 }, + { GL_RGB, GL_UNSIGNED_SHORT_5_6_5, "RGB/565", 2 }, + { GL_LUMINANCE, GL_UNSIGNED_BYTE, "L/ubyte", 1 }, + { GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, "Z/uint", 4 }, + { GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, "Z+S/uint", 4 }, + { 0, 0, NULL, 0 } +}; + + + +/** Called from test harness/main */ +void +PerfNextRound(void) +{ +} + + +/** Called from test harness/main */ +void +PerfDraw(void) +{ + double rate, mbPerSec; + int fmt, sz; + + /* loop over formats */ + for (fmt = 0; DstFormats[fmt].format; fmt++) { + ReadFormat = DstFormats[fmt].format; + ReadType = DstFormats[fmt].type; + + /* loop over sizes */ + for (sz = 0; Sizes[sz]; sz++) { + int imgSize; + + ReadWidth = ReadHeight = Sizes[sz]; + imgSize = ReadWidth * ReadHeight * DstFormats[fmt].pixel_size; + ReadBuffer = malloc(imgSize); + + if (ReadFormat == GL_DEPTH_STENCIL_EXT && !HaveDepthStencil) { + rate = 0.0; + mbPerSec = 0.0; + } + else { + rate = PerfMeasureRate(ReadPixels); + mbPerSec = rate * imgSize / (1024.0 * 1024.0); + } + + perf_printf("glReadPixels(%d x %d, %s): %.1f images/sec, %.1f Mpixels/sec\n", + ReadWidth, ReadHeight, + DstFormats[fmt].name, rate, mbPerSec); + + free(ReadBuffer); + } + } + + exit(0); +} diff --git a/progs/perf/swapbuffers.c b/progs/perf/swapbuffers.c new file mode 100644 index 0000000000..63c7fc06f9 --- /dev/null +++ b/progs/perf/swapbuffers.c @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Measure SwapBuffers. + * + * Keith Whitwell + * 22 Sep 2009 + */ + +#include "glmain.h" +#include "common.h" + + +int WinWidth = 100, WinHeight = 100; +int real_WinWidth, real_WinHeight; /* don't know whats going on here */ + +static GLuint VBO; + +struct vertex +{ + GLfloat x, y; +}; + +static const struct vertex vertices[4] = { + { -1.0, -1.0 }, + { 1.0, -1.0 }, + { 1.0, 1.0 }, + { -1.0, 1.0 } +}; + + +/** Called from test harness/main */ +void +PerfInit(void) +{ + /* setup VBO w/ vertex data */ + glGenBuffersARB(1, &VBO); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, VBO); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, + sizeof(vertices), vertices, GL_STATIC_DRAW_ARB); + glVertexPointer(2, GL_FLOAT, sizeof(struct vertex), (void *) 0); + glEnableClientState(GL_VERTEX_ARRAY); + + /* misc GL state */ + glAlphaFunc(GL_ALWAYS, 0.0); +} + +static void +SwapNaked(unsigned count) +{ + unsigned i; + for (i = 0; i < count; i++) { + PerfSwapBuffers(); + } +} + + +static void +SwapClear(unsigned count) +{ + unsigned i; + for (i = 0; i < count; i++) { + glClear(GL_COLOR_BUFFER_BIT); + PerfSwapBuffers(); + } +} + +static void +SwapClearPoint(unsigned count) +{ + unsigned i; + for (i = 0; i < count; i++) { + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_POINTS, 0, 4); + PerfSwapBuffers(); + } +} + + +static const struct { + unsigned w; + unsigned h; +} sizes[] = { + { 320, 240 }, + { 640, 480 }, + { 1024, 768 }, + { 1200, 1024 }, + { 1600, 1200 } +}; + +void +PerfNextRound(void) +{ + static unsigned i; + + if (i < sizeof(sizes) / sizeof(sizes[0]) && + PerfReshapeWindow( sizes[i].w, sizes[i].h )) + { + perf_printf("Reshape %dx%d\n", sizes[i].w, sizes[i].h); + real_WinWidth = sizes[i].w; + real_WinHeight = sizes[i].h; + i++; + } + else { + exit(0); + } +} + + + + +/** Called from test harness/main */ +void +PerfDraw(void) +{ + double rate0; + + rate0 = PerfMeasureRate(SwapNaked); + perf_printf(" Swapbuffers %dx%d: %s swaps/second", + real_WinWidth, real_WinHeight, + PerfHumanFloat(rate0)); + perf_printf(" %s pixels/second\n", + PerfHumanFloat(rate0 * real_WinWidth * real_WinHeight)); + + + + rate0 = PerfMeasureRate(SwapClear); + perf_printf(" Swap/Clear %dx%d: %s swaps/second", + real_WinWidth, real_WinHeight, + PerfHumanFloat(rate0)); + perf_printf(" %s pixels/second\n", + PerfHumanFloat(rate0 * real_WinWidth * real_WinHeight)); + + + rate0 = PerfMeasureRate(SwapClearPoint); + perf_printf(" Swap/Clear/Draw %dx%d: %s swaps/second", + real_WinWidth, real_WinHeight, + PerfHumanFloat(rate0)); + perf_printf(" %s pixels/second\n", + PerfHumanFloat(rate0 * real_WinWidth * real_WinHeight)); +} + diff --git a/progs/perf/teximage.c b/progs/perf/teximage.c index 11d781fccc..a3005d0bef 100644 --- a/progs/perf/teximage.c +++ b/progs/perf/teximage.c @@ -20,7 +20,7 @@ */ /** - * Measure glTexSubImage2D rate + * Measure glTex[Sub]Image2D() and glGetTexImage() rate * * Brian Paul * 16 Sep 2009 @@ -36,10 +36,28 @@ static GLuint VBO; static GLuint TexObj = 0; static GLubyte *TexImage = NULL; static GLsizei TexSize; -static GLenum TexSrcFormat, TexSrcType; +static GLenum TexIntFormat, TexSrcFormat, TexSrcType; static const GLboolean DrawPoint = GL_TRUE; -static const GLboolean TexSubImage4 = GL_TRUE; +static const GLboolean TexSubImage4 = GL_FALSE; + +enum { + MODE_CREATE_TEXIMAGE, + MODE_TEXIMAGE, + MODE_TEXSUBIMAGE, + MODE_GETTEXIMAGE, + MODE_COUNT +}; + +static const char *mode_name[MODE_COUNT] = +{ + "Create_TexImage", + "TexImage", + "TexSubImage", + "GetTexImage" +}; + + struct vertex { @@ -50,11 +68,8 @@ static const struct vertex vertices[1] = { { 0.0, 0.0, 0.5, 0.5 }, }; -#if 0 #define VOFFSET(F) ((void *) offsetof(struct vertex, F)) -#else -#define VOFFSET(F) ((void *) &((struct vertex *)NULL)->F) -#endif + /** Called from test harness/main */ void @@ -79,6 +94,32 @@ PerfInit(void) } + + +static void +CreateUploadTexImage2D(unsigned count) +{ + unsigned i; + for (i = 0; i < count; i++) { + if (TexObj) + glDeleteTextures(1, &TexObj); + + glGenTextures(1, &TexObj); + glBindTexture(GL_TEXTURE_2D, TexObj); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + glTexImage2D(GL_TEXTURE_2D, 0, TexIntFormat, + TexSize, TexSize, 0, + TexSrcFormat, TexSrcType, TexImage); + + if (DrawPoint) + glDrawArrays(GL_POINTS, 0, 1); + } + glFinish(); +} + + static void UploadTexImage2D(unsigned count) { @@ -89,7 +130,7 @@ UploadTexImage2D(unsigned count) * in Mesa but may be optimized in other drivers. Note sure how * much difference that might make. */ - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, + glTexImage2D(GL_TEXTURE_2D, 0, TexIntFormat, TexSize, TexSize, 0, TexSrcFormat, TexSrcType, TexImage); if (DrawPoint) @@ -132,8 +173,8 @@ UploadTexSubImage2D(unsigned count) TexSrcFormat, TexSrcType, TexImage); /* reset the unpacking state */ glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); - glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } else { /* replace whole texture image at once */ @@ -148,17 +189,43 @@ UploadTexSubImage2D(unsigned count) } +static void +GetTexImage2D(unsigned count) +{ + unsigned i; + GLubyte *buf = (GLubyte *) malloc(TexSize * TexSize * 4); + for (i = 0; i < count; i++) { + glGetTexImage(GL_TEXTURE_2D, 0, + TexSrcFormat, TexSrcType, buf); + } + glFinish(); + free(buf); +} + + /* XXX any other formats to measure? */ static const struct { GLenum format, type; + GLenum internal_format; const char *name; + GLuint texel_size; + GLboolean full_test; } SrcFormats[] = { - { GL_RGBA, GL_UNSIGNED_BYTE, "GL_RGBA/GLubyte" }, - { GL_BGRA, GL_UNSIGNED_BYTE, "GL_BGRA/GLubyte" }, - { 0, 0, NULL } + { GL_RGBA, GL_UNSIGNED_BYTE, GL_RGBA, "RGBA/ubyte", 4, GL_TRUE }, + { GL_RGB, GL_UNSIGNED_BYTE, GL_RGB, "RGB/ubyte", 3, GL_FALSE }, + { GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_RGB, "RGB/565", 2, GL_FALSE }, + { GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA, "BGRA/ubyte", 4, GL_FALSE }, + { GL_LUMINANCE, GL_UNSIGNED_BYTE, GL_LUMINANCE, "L/ubyte", 1, GL_FALSE }, + { 0, 0, 0, NULL, 0, 0 } }; +/** Called from test harness/main */ +void +PerfNextRound(void) +{ +} + /** Called from test harness/main */ void @@ -166,46 +233,97 @@ PerfDraw(void) { GLint maxSize; double rate; - GLint fmt, subImage; + GLint fmt, mode; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxSize); /* loop over source data formats */ for (fmt = 0; SrcFormats[fmt].format; fmt++) { + TexIntFormat = SrcFormats[fmt].internal_format; TexSrcFormat = SrcFormats[fmt].format; TexSrcType = SrcFormats[fmt].type; /* loop over glTexImage, glTexSubImage */ - for (subImage = 0; subImage < 2; subImage++) { + for (mode = 0; mode < MODE_COUNT; mode++) { + GLuint minsz, maxsz; - /* loop over texture sizes */ - for (TexSize = 16; TexSize <= maxSize; TexSize *= 2) { - GLint bytesPerImage; + if (SrcFormats[fmt].full_test) { + minsz = 16; + maxsz = 4096; + } + else { + minsz = maxsz = 256; + if (mode == MODE_CREATE_TEXIMAGE) + continue; + } + + /* loop over a defined range of texture sizes, test only the + * ones which are legal for this driver. + */ + for (TexSize = minsz; TexSize <= maxsz; TexSize *= 4) { double mbPerSec; - bytesPerImage = TexSize * TexSize * 4; - TexImage = malloc(bytesPerImage); + if (TexSize <= maxSize) { + GLint bytesPerImage; + + bytesPerImage = TexSize * TexSize * SrcFormats[fmt].texel_size; + TexImage = malloc(bytesPerImage); + + switch (mode) { + case MODE_TEXIMAGE: + rate = PerfMeasureRate(UploadTexImage2D); + break; + + case MODE_CREATE_TEXIMAGE: + rate = PerfMeasureRate(CreateUploadTexImage2D); + break; + + case MODE_TEXSUBIMAGE: + /* create initial, empty texture */ + glTexImage2D(GL_TEXTURE_2D, 0, TexIntFormat, + TexSize, TexSize, 0, + TexSrcFormat, TexSrcType, NULL); + rate = PerfMeasureRate(UploadTexSubImage2D); + break; + + case MODE_GETTEXIMAGE: + glTexImage2D(GL_TEXTURE_2D, 0, TexIntFormat, + TexSize, TexSize, 0, + TexSrcFormat, TexSrcType, TexImage); + rate = PerfMeasureRate(GetTexImage2D); + break; + + default: + exit(1); + } + + mbPerSec = rate * bytesPerImage / (1024.0 * 1024.0); + free(TexImage); + + + { + unsigned err; + err = glGetError(); + if (err) { + perf_printf("non-zero glGetError() %d\n", err); + exit(1); + } + } - if (subImage) { - /* create initial, empty texture */ - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, - TexSize, TexSize, 0, - TexSrcFormat, TexSrcType, NULL); - rate = PerfMeasureRate(UploadTexSubImage2D); } else { - rate = PerfMeasureRate(UploadTexImage2D); + rate = 0; + mbPerSec = 0; } - mbPerSec = rate * bytesPerImage / (1024.0 * 1024.0); - - perf_printf(" glTex%sImage2D(%s %d x %d): " + perf_printf(" %s(%s %d x %d): " "%.1f images/sec, %.1f MB/sec\n", - (subImage ? "Sub" : ""), + mode_name[mode], SrcFormats[fmt].name, TexSize, TexSize, rate, mbPerSec); - - free(TexImage); } + + if (SrcFormats[fmt].full_test) + perf_printf("\n"); } } diff --git a/progs/perf/vbo.c b/progs/perf/vbo.c index d2630796ae..b326c056ec 100644 --- a/progs/perf/vbo.c +++ b/progs/perf/vbo.c @@ -31,13 +31,17 @@ #include "glmain.h" #include "common.h" +/* Copy data out of a large array to avoid caching effects: + */ +#define DATA_SIZE (16*1024*1024) int WinWidth = 100, WinHeight = 100; static GLuint VBO; static GLsizei VBOSize = 0; -static GLubyte *VBOData = NULL; +static GLsizei SubSize = 0; +static GLubyte *VBOData = NULL; /* array[DATA_SIZE] */ static const GLboolean DrawPoint = GL_TRUE; static const GLboolean BufferSubDataInHalves = GL_TRUE; @@ -61,11 +65,23 @@ static void UploadVBO(unsigned count) { unsigned i; + unsigned total = 0; + unsigned src = 0; + for (i = 0; i < count; i++) { - glBufferDataARB(GL_ARRAY_BUFFER, VBOSize, VBOData, GL_STREAM_DRAW_ARB); + glBufferDataARB(GL_ARRAY_BUFFER, VBOSize, VBOData + src, GL_STREAM_DRAW_ARB); + glDrawArrays(GL_POINTS, 0, 1); + + /* Throw in an occasional flush to work around a driver crash: + */ + total += VBOSize; + if (total >= 16*1024*1024) { + glFlush(); + total = 0; + } - if (DrawPoint) - glDrawArrays(GL_POINTS, 0, 1); + src += VBOSize; + src %= DATA_SIZE; } glFinish(); } @@ -75,18 +91,69 @@ static void UploadSubVBO(unsigned count) { unsigned i; + unsigned src = 0; + for (i = 0; i < count; i++) { - if (BufferSubDataInHalves) { - GLsizei half = VBOSize / 2; - glBufferSubDataARB(GL_ARRAY_BUFFER, 0, half, VBOData); - glBufferSubDataARB(GL_ARRAY_BUFFER, half, half, VBOData + half); + unsigned offset = (i * SubSize) % VBOSize; + glBufferSubDataARB(GL_ARRAY_BUFFER, offset, SubSize, VBOData + src); + + if (DrawPoint) { + glDrawArrays(GL_POINTS, offset / sizeof(Vertex0), 1); } - else { - glBufferSubDataARB(GL_ARRAY_BUFFER, 0, VBOSize, VBOData); + + src += SubSize; + src %= DATA_SIZE; + } + glFinish(); +} + + +/* Do multiple small SubData uploads, then call DrawArrays. This may be a + * fairer comparison to back-to-back BufferData calls: + */ +static void +BatchUploadSubVBO(unsigned count) +{ + unsigned i = 0, j; + unsigned period = VBOSize / SubSize; + unsigned src = 0; + + while (i < count) { + for (j = 0; j < period && i < count; j++, i++) { + unsigned offset = j * SubSize; + glBufferSubDataARB(GL_ARRAY_BUFFER, offset, SubSize, VBOData + src); } - if (DrawPoint) - glDrawArrays(GL_POINTS, 0, 1); + glDrawArrays(GL_POINTS, 0, 1); + + src += SubSize; + src %= DATA_SIZE; + } + glFinish(); +} + + +/** + * Test the sequence: + * create/load VBO + * draw + * destroy VBO + */ +static void +CreateDrawDestroyVBO(unsigned count) +{ + unsigned i; + for (i = 0; i < count; i++) { + GLuint vbo; + /* create/load */ + glGenBuffersARB(1, &vbo); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo); + glBufferDataARB(GL_ARRAY_BUFFER, VBOSize, VBOData, GL_STREAM_DRAW_ARB); + /* draw */ + glVertexPointer(2, GL_FLOAT, sizeof(Vertex0), (void *) 0); + glDrawArrays(GL_POINTS, 0, 1); + /* destroy */ + glDeleteBuffersARB(1, &vbo); } glFinish(); } @@ -102,36 +169,77 @@ static const GLsizei Sizes[] = { 0 /* end of list */ }; +void +PerfNextRound(void) +{ +} /** Called from test harness/main */ void PerfDraw(void) { double rate, mbPerSec; - int sub, sz; + int i, sz; - /* loop over whole/sub buffer upload */ - for (sub = 0; sub < 2; sub++) { + /* Load VBOData buffer with duplicated Vertex0. + */ + VBOData = calloc(DATA_SIZE, 1); - /* loop over VBO sizes */ - for (sz = 0; Sizes[sz]; sz++) { - VBOSize = Sizes[sz]; + for (i = 0; i < DATA_SIZE / sizeof(Vertex0); i++) { + memcpy(VBOData + i * sizeof(Vertex0), + Vertex0, + sizeof(Vertex0)); + } - VBOData = malloc(VBOSize); - memcpy(VBOData, Vertex0, sizeof(Vertex0)); + /* glBufferDataARB() + */ + for (sz = 0; Sizes[sz]; sz++) { + SubSize = VBOSize = Sizes[sz]; + rate = PerfMeasureRate(UploadVBO); + mbPerSec = rate * VBOSize / (1024.0 * 1024.0); + perf_printf(" glBufferDataARB(size = %d): %.1f MB/sec\n", + VBOSize, mbPerSec); + } - if (sub) - rate = PerfMeasureRate(UploadSubVBO); - else - rate = PerfMeasureRate(UploadVBO); + /* glBufferSubDataARB() + */ + for (sz = 0; Sizes[sz]; sz++) { + SubSize = VBOSize = Sizes[sz]; + rate = PerfMeasureRate(UploadSubVBO); + mbPerSec = rate * VBOSize / (1024.0 * 1024.0); + perf_printf(" glBufferSubDataARB(size = %d): %.1f MB/sec\n", + VBOSize, mbPerSec); + } - mbPerSec = rate * VBOSize / (1024.0 * 1024.0); + /* Batch upload + */ + VBOSize = 1024 * 1024; + glBufferDataARB(GL_ARRAY_BUFFER, VBOSize, VBOData, GL_STREAM_DRAW_ARB); + + for (sz = 0; Sizes[sz] < VBOSize; sz++) { + SubSize = Sizes[sz]; + rate = PerfMeasureRate(UploadSubVBO); + mbPerSec = rate * SubSize / (1024.0 * 1024.0); + perf_printf(" glBufferSubDataARB(size = %d, VBOSize = %d): %.1f MB/sec\n", + SubSize, VBOSize, mbPerSec); + } - perf_printf(" glBuffer%sDataARB(size = %d): %.1f MB/sec\n", - (sub ? "Sub" : ""), VBOSize, mbPerSec); + for (sz = 0; Sizes[sz] < VBOSize; sz++) { + SubSize = Sizes[sz]; + rate = PerfMeasureRate(BatchUploadSubVBO); + mbPerSec = rate * SubSize / (1024.0 * 1024.0); + perf_printf(" glBufferSubDataARB(size = %d, VBOSize = %d), batched: %.1f MB/sec\n", + SubSize, VBOSize, mbPerSec); + } - free(VBOData); - } + /* Create/Draw/Destroy + */ + for (sz = 0; Sizes[sz]; sz++) { + SubSize = VBOSize = Sizes[sz]; + rate = PerfMeasureRate(CreateDrawDestroyVBO); + mbPerSec = rate * VBOSize / (1024.0 * 1024.0); + perf_printf(" VBO Create/Draw/Destroy(size = %d): %.1f MB/sec, %.1f draws/sec\n", + VBOSize, mbPerSec, rate); } exit(0); diff --git a/progs/perf/vertexrate.c b/progs/perf/vertexrate.c index b84b22a4c4..b5355525d0 100644 --- a/progs/perf/vertexrate.c +++ b/progs/perf/vertexrate.c @@ -228,6 +228,11 @@ DrawRangeElementsBO(unsigned count) PerfSwapBuffers(); } +void +PerfNextRound(void) +{ +} + /** Called from test harness/main */ void @@ -241,31 +246,31 @@ PerfDraw(void) rate = PerfMeasureRate(DrawImmediate); rate *= NumVerts; - perf_printf(" Immediate mode: %.1f verts/sec\n", rate); + perf_printf(" Immediate mode: %s verts/sec\n", PerfHumanFloat(rate)); rate = PerfMeasureRate(DrawArraysMem); rate *= NumVerts; - perf_printf(" glDrawArrays: %.1f verts/sec\n", rate); + perf_printf(" glDrawArrays: %s verts/sec\n", PerfHumanFloat(rate)); rate = PerfMeasureRate(DrawArraysVBO); rate *= NumVerts; - perf_printf(" VBO glDrawArrays: %.1f verts/sec\n", rate); + perf_printf(" VBO glDrawArrays: %s verts/sec\n", PerfHumanFloat(rate)); rate = PerfMeasureRate(DrawElementsMem); rate *= NumVerts; - perf_printf(" glDrawElements: %.1f verts/sec\n", rate); + perf_printf(" glDrawElements: %s verts/sec\n", PerfHumanFloat(rate)); rate = PerfMeasureRate(DrawElementsBO); rate *= NumVerts; - perf_printf(" VBO glDrawElements: %.1f verts/sec\n", rate); + perf_printf(" VBO glDrawElements: %s verts/sec\n", PerfHumanFloat(rate)); rate = PerfMeasureRate(DrawRangeElementsMem); rate *= NumVerts; - perf_printf(" glDrawRangeElements: %.1f verts/sec\n", rate); + perf_printf(" glDrawRangeElements: %s verts/sec\n", PerfHumanFloat(rate)); rate = PerfMeasureRate(DrawRangeElementsBO); rate *= NumVerts; - perf_printf(" VBO glDrawRangeElements: %.1f verts/sec\n", rate); + perf_printf(" VBO glDrawRangeElements: %s verts/sec\n", PerfHumanFloat(rate)); exit(0); } |