summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i915
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-11-25 15:41:37 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2011-02-21 13:04:46 +0000
commit2c6793fb6bc89df16c23f727bcb072a157ab8d10 (patch)
treeef6ab3dfa931a84b8ab84259d45346c2a1dee622 /src/mesa/drivers/dri/i915
parent298ebb78de8a6b6edf0aa0fe8d784d00bbc2930e (diff)
i915: Emit a single relocation per vbo
Reducing the number of relocations has lots of nice knock-on effects, not least including reducing batch buffer size, auxilliary array sizes (vmalloced and copied into the kernel), processing of uncached relocations etc. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/mesa/drivers/dri/i915')
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.h10
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c4
-rw-r--r--src/mesa/drivers/dri/i915/i915_reg.h7
-rw-r--r--src/mesa/drivers/dri/i915/i915_vtbl.c3
-rw-r--r--src/mesa/drivers/dri/i915/intel_tris.c38
5 files changed, 45 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h
index e38281ee03..601620275f 100644
--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -29,7 +29,6 @@
#define I915CONTEXT_INC
#include "intel_context.h"
-#include "i915_reg.h"
#define I915_FALLBACK_TEXTURE 0x1000
#define I915_FALLBACK_COLORMASK 0x2000
@@ -126,6 +125,12 @@ enum {
#define I915_MAX_CONSTANT 32
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
+#define I915_MAX_TEX_INDIRECT 4
+#define I915_MAX_TEX_INSN 32
+#define I915_MAX_ALU_INSN 64
+#define I915_MAX_DECL_INSN 27
+#define I915_MAX_TEMPORARY 16
+
#define I915_MAX_INSN (I915_MAX_DECL_INSN + \
I915_MAX_TEX_INSN + \
I915_MAX_ALU_INSN)
@@ -264,6 +269,9 @@ struct i915_context
struct i915_fragment_program *current_program;
+ drm_intel_bo *current_vb_bo;
+ unsigned int current_vertex_size;
+
struct i915_hw_state state;
uint32_t last_draw_offset;
GLuint last_sampler;
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 8bc88a8f44..25f4fc3c8b 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -1422,6 +1422,10 @@ i915ValidateFragmentProgram(struct i915_context *i915)
intel->vertex_attr_count,
intel->ViewportMatrix.m, 0);
+ assert(intel->prim.current_offset == intel->prim.start_offset);
+ intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
+ intel->prim.current_offset = intel->prim.start_offset;
+
intel->vertex_size >>= 2;
i915->state.Ctx[I915_CTXREG_LIS2] = s2;
diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h
index 7f31ff674f..766547a4c6 100644
--- a/src/mesa/drivers/dri/i915/i915_reg.h
+++ b/src/mesa/drivers/dri/i915/i915_reg.h
@@ -361,13 +361,6 @@
/* p222 */
-#define I915_MAX_TEX_INDIRECT 4
-#define I915_MAX_TEX_INSN 32
-#define I915_MAX_ALU_INSN 64
-#define I915_MAX_DECL_INSN 27
-#define I915_MAX_TEMPORARY 16
-
-
/* Each instruction is 3 dwords long, though most don't require all
* this space. Maximum of 123 instructions. Smaller maxes per insn
* type.
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
index 4049c37fdb..921183b81d 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -678,6 +678,9 @@ i915_new_batch(struct intel_context *intel)
i915->state.emitted = 0;
i915->last_draw_offset = 0;
i915->last_sampler = 0;
+
+ i915->current_vb_bo = NULL;
+ i915->current_vertex_size = 0;
}
static void
diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
index c6b5a01885..cf9291cdfc 100644
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -54,6 +54,7 @@
#include "intel_span.h"
#include "i830_context.h"
#include "i830_reg.h"
+#include "i915_context.h"
static void intelRenderPrimitive(struct gl_context * ctx, GLenum prim);
static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
@@ -215,7 +216,7 @@ void intel_flush_prim(struct intel_context *intel)
offset = intel->prim.start_offset;
intel->prim.start_offset = intel->prim.current_offset;
if (intel->gen < 3)
- intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
+ intel->prim.current_offset = intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
intel->prim.flush = NULL;
intel->vtbl.emit_state(intel);
@@ -240,20 +241,39 @@ void intel_flush_prim(struct intel_context *intel)
#endif
if (intel->gen >= 3) {
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
- assert((offset & ~S0_VB_OFFSET_MASK) == 0);
- OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
- OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
- (intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
+ struct i915_context *i915 = i915_context(&intel->ctx);
+ unsigned int cmd = 0, len = 0;
+
+ if (vb_bo != i915->current_vb_bo) {
+ cmd |= I1_LOAD_S(0);
+ len++;
+ }
+ if (intel->vertex_size != i915->current_vertex_size) {
+ cmd |= I1_LOAD_S(1);
+ len++;
+ }
+ if (len)
+ len++;
+
+ BEGIN_BATCH(2+len);
+ if (cmd)
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | cmd | (len - 2));
+ if (vb_bo != i915->current_vb_bo) {
+ OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
+ i915->current_vb_bo = vb_bo;
+ }
+ if (intel->vertex_size != i915->current_vertex_size) {
+ OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
+ (intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
+ i915->current_vertex_size = intel->vertex_size;
+ }
OUT_BATCH(_3DPRIMITIVE |
PRIM_INDIRECT |
PRIM_INDIRECT_SEQUENTIAL |
intel->prim.primitive |
count);
- OUT_BATCH(0); /* Beginning vertex index */
+ OUT_BATCH(offset / (intel->vertex_size * 4));
ADVANCE_BATCH();
} else {
struct i830_context *i830 = i830_context(&intel->ctx);