From 48e6fff3a9d97fffa6d2dd63e63ffe94c25d2d96 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 1 Nov 2006 14:26:10 +0000 Subject: merge the (rest of) texmem branch --- progs/demos/Makefile | 1 + progs/demos/streaming_rect.c | 322 +++++++++++++++++++++++++++++++++++++++++++ progs/demos/texdown.c | 133 ++++++++++++++---- 3 files changed, 429 insertions(+), 27 deletions(-) create mode 100644 progs/demos/streaming_rect.c (limited to 'progs/demos') diff --git a/progs/demos/Makefile b/progs/demos/Makefile index 43d0f17c85..feb2abd6cf 100644 --- a/progs/demos/Makefile +++ b/progs/demos/Makefile @@ -47,6 +47,7 @@ PROGS = \ renormal \ shadowtex \ singlebuffer \ + streaming_rect \ spectex \ spriteblast \ stex3d \ diff --git a/progs/demos/streaming_rect.c b/progs/demos/streaming_rect.c new file mode 100644 index 0000000000..86e00803c0 --- /dev/null +++ b/progs/demos/streaming_rect.c @@ -0,0 +1,322 @@ + +/* + * GL_ARB_multitexture demo + * + * Command line options: + * -info print GL implementation information + * + * + * Brian Paul November 1998 This program is in the public domain. + * Modified on 12 Feb 2002 for > 2 texture units. + */ + +#define GL_GLEXT_PROTOTYPES + +#include +#include +#include +#include +#include + +#include "readtex.h" + + +#define ANIMATE 10 +#define PBO 11 +#define QUIT 100 + +static GLboolean Animate = GL_TRUE; +static GLboolean use_pbo = 1; +static GLboolean whole_rect = 1; + +static GLfloat Drift = 0.0; +static GLfloat drift_increment = 1/255.0; +static GLfloat Xrot = 20.0, Yrot = 30.0; + +static GLuint Width = 1024; +static GLuint Height = 512; + + +static void Idle( void ) +{ + if (Animate) { + + Drift += drift_increment; + if (Drift >= 1.0) + Drift = 0.0; + + glutPostRedisplay(); + } +} + +static int max( int a, int b ) { return a > b ? a : b; } +static int min( int a, int b ) { return a < b ? a : b; } + +static void DrawObject() +{ + GLint size = Width * Height * 4; + + if (use_pbo) { + /* XXX: This is extremely important - semantically makes the buffer + * contents undefined, but in practice means that the driver can + * release the old copy of the texture and allocate a new one + * without waiting for outstanding rendering to complete. + */ + glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_EXT, size, NULL, GL_STREAM_DRAW_ARB); + + { + char *image = glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_EXT, GL_WRITE_ONLY_ARB); + + printf("char %d\n", (unsigned char)(Drift * 255)); + + memset(image, size, (unsigned char)(Drift * 255)); + + glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_EXT); + } + + + /* BGRA is required for most hardware paths: + */ + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, Width, Height, 0, + GL_BGRA, GL_UNSIGNED_BYTE, NULL); + } + else { + static char *image = NULL; + + if (image == NULL) + image = malloc(size); + + memset(image, size, (unsigned char)(Drift * 255)); + + /* BGRA should be the fast path for regular uploads as well. + */ + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, Width, Height, 0, + GL_BGRA, GL_UNSIGNED_BYTE, image); + } + + { + int x,y,w,h; + + if (whole_rect) { + x = y = 0; + w = Width; + h = Height; + } + else { + x = y = 0; + w = min(10, Width); + h = min(10, Height); + } + + glBegin(GL_QUADS); + + glTexCoord2f( x, y); + glVertex2f( x, y ); + + glTexCoord2f( x, y + h); + glVertex2f( x, y + h); + + glTexCoord2f( x + w + .5, y + h); + glVertex2f( x + w, y + h ); + + glTexCoord2f( x + w, y + .5); + glVertex2f( x + w, y ); + + glEnd(); + } +} + + + +static void Display( void ) +{ + static GLint T0 = 0; + static GLint Frames = 0; + GLint t; + + glClear( GL_COLOR_BUFFER_BIT ); + + glPushMatrix(); + DrawObject(); + glPopMatrix(); + + glutSwapBuffers(); + + Frames++; + + t = glutGet(GLUT_ELAPSED_TIME); + if (t - T0 >= 1000) { + GLfloat seconds = (t - T0) / 1000.0; + + GLfloat fps = Frames / seconds; + printf("%d frames in %6.3f seconds = %6.3f FPS\n", Frames, seconds, fps); + + drift_increment = 2.2 * seconds / Frames; + T0 = t; + Frames = 0; + } +} + + +static void Reshape( int width, int height ) +{ + glViewport( 0, 0, width, height ); + glMatrixMode( GL_PROJECTION ); + glLoadIdentity(); +/* glFrustum( -1.0, 1.0, -1.0, 1.0, 10.0, 100.0 ); */ + gluOrtho2D( 0, width, height, 0 ); + glMatrixMode( GL_MODELVIEW ); + glLoadIdentity(); + glTranslatef(0.375, 0.375, 0); +} + + +static void ModeMenu(int entry) +{ + if (entry==ANIMATE) { + Animate = !Animate; + } + else if (entry==PBO) { + use_pbo = !use_pbo; + } + else if (entry==QUIT) { + exit(0); + } + + glutPostRedisplay(); +} + + +static void Key( unsigned char key, int x, int y ) +{ + (void) x; + (void) y; + switch (key) { + case 27: + exit(0); + break; + } + glutPostRedisplay(); +} + + +static void SpecialKey( int key, int x, int y ) +{ + float step = 3.0; + (void) x; + (void) y; + + switch (key) { + case GLUT_KEY_UP: + Xrot += step; + break; + case GLUT_KEY_DOWN: + Xrot -= step; + break; + case GLUT_KEY_LEFT: + Yrot += step; + break; + case GLUT_KEY_RIGHT: + Yrot -= step; + break; + } + glutPostRedisplay(); +} + + +static void Init( int argc, char *argv[] ) +{ + const char *exten = (const char *) glGetString(GL_EXTENSIONS); + GLuint texObj, DrawPBO; + GLint size; + + + if (!strstr(exten, "GL_ARB_multitexture")) { + printf("Sorry, GL_ARB_multitexture not supported by this renderer.\n"); + exit(1); + } + + glGetIntegerv(GL_MAX_TEXTURE_SIZE, &size); + printf("%d x %d max texture size\n", size, size); + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + /* allocate two texture objects */ + glGenTextures(1, &texObj); + + /* setup the texture objects */ + glActiveTextureARB(GL_TEXTURE0_ARB); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texObj); + + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + glGenBuffersARB(1, &DrawPBO); + + glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_EXT, DrawPBO); + glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_EXT, + Width * Height * 4, NULL, GL_STREAM_DRAW); + + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + + glEnable(GL_TEXTURE_RECTANGLE_ARB); + + glShadeModel(GL_SMOOTH); + glClearColor(0.3, 0.3, 0.4, 1.0); + + if (argc > 1 && strcmp(argv[1], "-info")==0) { + printf("GL_RENDERER = %s\n", (char *) glGetString(GL_RENDERER)); + printf("GL_VERSION = %s\n", (char *) glGetString(GL_VERSION)); + printf("GL_VENDOR = %s\n", (char *) glGetString(GL_VENDOR)); + printf("GL_EXTENSIONS = %s\n", (char *) glGetString(GL_EXTENSIONS)); + } +} + + +int main( int argc, char *argv[] ) +{ + GLint i; + + glutInit( &argc, argv ); + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "-w") == 0) { + Width = atoi(argv[i+1]); + if (Width <= 0) { + printf("Error, bad width\n"); + exit(1); + } + i++; + } + else if (strcmp(argv[i], "-h") == 0) { + Height = atoi(argv[i+1]); + if (Height <= 0) { + printf("Error, bad height\n"); + exit(1); + } + i++; + } + } + + glutInitWindowSize( Width, Height ); + glutInitWindowPosition( 0, 0 ); + glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE ); + glutCreateWindow(argv[0] ); + + Init( argc, argv ); + + glutReshapeFunc( Reshape ); + glutKeyboardFunc( Key ); + glutSpecialFunc( SpecialKey ); + glutDisplayFunc( Display ); + glutIdleFunc( Idle ); + + glutCreateMenu(ModeMenu); + glutAddMenuEntry("Toggle Animation", ANIMATE); + glutAddMenuEntry("Toggle PBO", PBO); + glutAddMenuEntry("Quit", QUIT); + glutAttachMenu(GLUT_RIGHT_BUTTON); + + glutMainLoop(); + return 0; +} diff --git a/progs/demos/texdown.c b/progs/demos/texdown.c index 79525a0395..fc98fddb31 100644 --- a/progs/demos/texdown.c +++ b/progs/demos/texdown.c @@ -38,8 +38,8 @@ #include -static GLsizei MaxSize = 1024; -static GLsizei TexWidth = 256, TexHeight = 256, TexBorder = 0; +static GLsizei MaxSize = 2048; +static GLsizei TexWidth = 1024, TexHeight = 1024, TexBorder = 0; static GLboolean ScaleAndBias = GL_FALSE; static GLboolean SubImage = GL_FALSE; static GLdouble DownloadRate = 0.0; /* texels/sec */ @@ -47,6 +47,32 @@ static GLdouble DownloadRate = 0.0; /* texels/sec */ static GLuint Mode = 0; +/* Try and avoid L2 cache effects by cycling through a small number of + * textures. + * + * At the initial size of 1024x1024x4 == 4mbyte, say 8 textures will + * keep us out of most caches at 32mb total. + * + * This turns into a fairly interesting question of what exactly you + * expect to be in cache in normal usage, and what you think should be + * outside. There's no rules for this, no reason to favour one usage + * over another except what the application you care about happens to + * resemble most closely. + * + * - Should the client texture image be in L2 cache? Has it just been + * generated or read from disk? + * - Does the application really use >1 texture, or is it constantly + * updating one image in-place? + * + * Different answers will favour different texture upload mechanisms. + * To upload an image that is purely outside of cache, a DMA-based + * upload will probably win, whereas for small, in-cache textures, + * copying looks good. + */ +#define NR_TEXOBJ 4 +static GLuint TexObj[NR_TEXOBJ]; + + struct FormatRec { GLenum Format; GLenum Type; @@ -116,25 +142,57 @@ TypeStr(GLenum type) } } +/* On x86, there is a performance cliff for memcpy to texture memory + * for sources below 64 byte alignment. We do our best with this in + * the driver, but it is better if the images are correctly aligned to + * start with: + */ +#define ALIGN (1<<12) + +static unsigned align(unsigned value, unsigned a) +{ + return (value + a - 1) & ~(a-1); +} + +static int MIN2(int a, int b) +{ + return a < b ? a : b; +} static void MeasureDownloadRate(void) { const int w = TexWidth + 2 * TexBorder; const int h = TexHeight + 2 * TexBorder; - const int bytes = w * h * BytesPerTexel(Format); + const int image_bytes = align(w * h * BytesPerTexel(Format), ALIGN); + const int bytes = image_bytes * NR_TEXOBJ; + GLubyte *orig_texImage, *orig_getImage; GLubyte *texImage, *getImage; GLdouble t0, t1, time; int count; int i; + int offset = 0; + GLdouble total = 0; /* ints will tend to overflow */ + + printf("allocating %d bytes for %d %dx%d images\n", + bytes, NR_TEXOBJ, w, h); - texImage = (GLubyte *) malloc(bytes); - getImage = (GLubyte *) malloc(bytes); - if (!texImage || !getImage) { + orig_texImage = (GLubyte *) malloc(bytes + ALIGN); + orig_getImage = (GLubyte *) malloc(image_bytes + ALIGN); + if (!orig_texImage || !orig_getImage) { DownloadRate = 0.0; return; } + printf("alloc %p %p\n", orig_texImage, orig_getImage); + + texImage = (GLubyte *)align((unsigned)orig_texImage, ALIGN); + getImage = (GLubyte *)align((unsigned)orig_getImage, ALIGN); + + for (i = 1; !(((unsigned)texImage) & i); i<<=1) + ; + printf("texture image alignment: %d bytes (%p)\n", i, texImage); + for (i = 0; i < bytes; i++) { texImage[i] = i & 0xff; } @@ -166,16 +224,50 @@ MeasureDownloadRate(void) count = 0; t0 = glutGet(GLUT_ELAPSED_TIME) * 0.001; do { + int img = count%NR_TEXOBJ; + GLubyte *img_ptr = texImage + img * image_bytes; + + glBindTexture(GL_TEXTURE_2D, TexObj[img]); + if (SubImage && count > 0) { - glTexSubImage2D(GL_TEXTURE_2D, 0, -TexBorder, -TexBorder, w, h, + /* Only update a portion of the image each iteration. This + * is presumably why you'd want to use texsubimage, otherwise + * you may as well just call teximage again. + * + * A bigger question is whether to use a pointer that moves + * with each call, ie does the incoming data come from L2 + * cache under normal circumstances, or is it pulled from + * uncached memory? + * + * There's a good argument to say L2 cache, ie you'd expect + * the data to have been recently generated. It's possible + * that it could have come from a file read, which may or may + * not have gone through the cpu. + */ + glTexSubImage2D(GL_TEXTURE_2D, 0, + -TexBorder, + -TexBorder + offset * h/8, + w, + h/8, FormatTable[Format].Format, - FormatTable[Format].Type, texImage); + FormatTable[Format].Type, +#if 1 + texImage /* likely in L2$ */ +#else + img_ptr + offset * bytes/8 /* unlikely in L2$ */ +#endif + ); + offset += 1; + offset %= 8; + total += w * h / 8; } else { glTexImage2D(GL_TEXTURE_2D, 0, FormatTable[Format].IntFormat, w, h, TexBorder, FormatTable[Format].Format, - FormatTable[Format].Type, texImage); + FormatTable[Format].Type, + img_ptr); + total += w*h; } /* draw a tiny polygon to force texture into texram */ @@ -192,25 +284,12 @@ MeasureDownloadRate(void) glDisable(GL_TEXTURE_2D); - printf("w*h=%d count=%d time=%f\n", w*h, count, time); - DownloadRate = w * h * count / time; - -#if 0 - if (!ScaleAndBias) { - /* verify texture readback */ - glGetTexImage(GL_TEXTURE_2D, 0, - FormatTable[Format].Format, - FormatTable[Format].Type, getImage); - for (i = 0; i < w * h; i++) { - if (texImage[i] != getImage[i]) { - printf("[%d] %d != %d\n", i, texImage[i], getImage[i]); - } - } - } -#endif + printf("total texels=%f time=%f\n", total, time); + DownloadRate = total / time; + - free(texImage); - free(getImage); + free(orig_texImage); + free(orig_getImage); { GLint err = glGetError(); -- cgit v1.2.3