diff options
author | Keith Whitwell <keithw@vmware.com> | 2009-01-11 16:19:21 +0000 |
---|---|---|
committer | Keith Whitwell <keithw@vmware.com> | 2009-01-11 16:19:21 +0000 |
commit | e37a3aed95ea91a7ddbabc4bed1fac7c451fe695 (patch) | |
tree | 9efa59fb8015a58146d7a50a3b6039e5fbe3a60d /src/gallium | |
parent | 61e843ff4bf9b9e8c4a7a8a485cee852a4f1dd86 (diff) | |
parent | 83a525af95bbb8012b9d7ee6b766621d6bb2d701 (diff) |
Merge commit 'origin/gallium-0.2' into gallium-xlib-rework
Diffstat (limited to 'src/gallium')
67 files changed, 1586 insertions, 3722 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 7e1df88f0b..a1c4c14445 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -37,6 +37,8 @@ #define DRAW_VBUF_H_ +#include "pipe/p_compiler.h" + struct pipe_rasterizer_state; struct draw_context; diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 8b75136144..d35db57d57 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -199,6 +199,11 @@ draw_create_vs_ppc(struct draw_context *draw, ppc_init_func( &vs->ppc_program ); +#if 0 + ppc_print_code(&vs->ppc_program, TRUE); + ppc_indent(&vs->ppc_program, 8); +#endif + if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, (float (*)[4]) vs->base.immediates, diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index e9015ec2eb..1bb9026205 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -47,6 +48,8 @@ ppc_init_func(struct ppc_function *p) { uint i; + memset(p, 0, sizeof(*p)); + p->num_inst = 0; p->max_inst = 100; /* first guess at buffer size */ p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); @@ -54,6 +57,9 @@ ppc_init_func(struct ppc_function *p) p->fp_used = 0x0; p->vec_used = 0x0; + p->print = FALSE; + p->indent = 0; + /* only allow using gp registers 3..12 for now */ for (i = 0; i < 3; i++) ppc_reserve_register(p, i); @@ -105,6 +111,42 @@ ppc_dump_func(const struct ppc_function *p) } +void +ppc_print_code(struct ppc_function *p, boolean enable) +{ + p->print = enable; +} + + +void +ppc_indent(struct ppc_function *p, int spaces) +{ + p->indent += spaces; +} + + +static void +indent(const struct ppc_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +void +ppc_comment(struct ppc_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("# %s\n", s); + } +} + + /** * Mark a register as being unavailable. */ @@ -132,6 +174,7 @@ ppc_allocate_register(struct ppc_function *p) return i; } } + printf("OUT OF PPC registers!\n"); return -1; } @@ -163,6 +206,7 @@ ppc_allocate_fp_register(struct ppc_function *p) return i; } } + printf("OUT OF PPC FP registers!\n"); return -1; } @@ -194,6 +238,7 @@ ppc_allocate_vec_register(struct ppc_function *p) return i; } } + printf("OUT OF PPC VEC registers!\n"); return -1; } @@ -252,7 +297,8 @@ union vx_inst { }; static INLINE void -emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format, boolean transpose) { union vx_inst inst; inst.inst.op = 4; @@ -261,6 +307,13 @@ emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.vB = vB; inst.inst.op2 = op2; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, vD, vB, vA); + else + printf(format, vD, vA, vB); + } } @@ -277,7 +330,8 @@ union vxr_inst { }; static INLINE void -emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format) { union vxr_inst inst; inst.inst.op = 4; @@ -287,6 +341,10 @@ emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.rC = 0; inst.inst.op2 = op2; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB); + } } @@ -303,7 +361,8 @@ union va_inst { }; static INLINE void -emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC, + const char *format) { union va_inst inst; inst.inst.op = 4; @@ -313,6 +372,10 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) inst.inst.vC = vC; inst.inst.op2 = op2; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB, vC); + } } @@ -396,7 +459,8 @@ union x_inst { }; static INLINE void -emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) +emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2, + const char *format) { union x_inst inst; inst.inst.op = op; @@ -406,6 +470,10 @@ emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) inst.inst.op2 = op2; inst.inst.unused = 0x0; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vrs, ra, rb); + } } @@ -420,7 +488,8 @@ union d_inst { }; static INLINE void -emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) +emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si, + const char *format, boolean transpose) { union d_inst inst; assert(si >= -32768); @@ -430,6 +499,13 @@ emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) inst.inst.ra = ra; inst.inst.si = (unsigned) (si & 0xffff); emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, rt, si, ra); + else + printf(format, rt, ra, si); + } } @@ -448,7 +524,7 @@ union a_inst { static INLINE void emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, - uint rc) + uint rc, const char *format) { union a_inst inst; inst.inst.op = op; @@ -459,6 +535,10 @@ emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, inst.inst.op2 = op2; inst.inst.rc = rc; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, frt, fra, frb); + } } @@ -477,7 +557,7 @@ union xo_inst { static INLINE void emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, - uint op2, uint rc) + uint op2, uint rc, const char *format) { union xo_inst inst; inst.inst.op = op; @@ -488,6 +568,10 @@ emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, inst.inst.op2 = op2; inst.inst.rc = rc; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, rt, ra, rb); + } } @@ -502,140 +586,142 @@ emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, void ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 10, vD, vA, vB); + emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE); } /** vector float substract */ void ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 74, vD, vA, vB); + emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE); } /** vector float min */ void ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1098, vD, vA, vB); + emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE); } /** vector float max */ void ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1034, vD, vA, vB); + emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE); } /** vector float mult add: vD = vA * vB + vC */ void ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 46, vD, vA, vC, vB); /* note arg order */ + /* note arg order */ + emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n"); } /** vector float negative mult subtract: vD = vA - vB * vC */ void ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 47, vD, vB, vA, vC); /* note arg order */ + /* note arg order */ + emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n"); } /** vector float compare greater than */ void ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vxr(p, 710, vD, vA, vB); + emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u"); } /** vector float compare greater than or equal to */ void ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vxr(p, 454, vD, vA, vB); + emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u"); } /** vector float compare equal */ void ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vxr(p, 198, vD, vA, vB); + emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u"); } /** vector float 2^x */ void ppc_vexptefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 394, vD, 0, vB); + emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float log2(x) */ void ppc_vlogefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 458, vD, 0, vB); + emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float reciprocol */ void ppc_vrefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 266, vD, 0, vB); + emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float reciprocol sqrt estimate */ void ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 330, vD, 0, vB); + emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to negative infinity */ void ppc_vrfim(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 714, vD, 0, vB); + emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to positive infinity */ void ppc_vrfip(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 650, vD, 0, vB); + emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to nearest int */ void ppc_vrfin(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 522, vD, 0, vB); + emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to int toward zero */ void ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 586, vD, 0, vB); + emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE); } -/** vector store: store vR at mem[vA+vB] */ +/** vector store: store vR at mem[rA+rB] */ void -ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB) +ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB) { - emit_x(p, 31, vR, vA, vB, 231); + emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n"); } -/** vector load: vR = mem[vA+vB] */ +/** vector load: vR = mem[rA+rB] */ void -ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) +ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB) { - emit_x(p, 31, vR, vA, vB, 103); + emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n"); } /** load vector element word: vR = mem_word[ra+rb] */ void -ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) +ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB) { - emit_x(p, 31, vr, ra, rb, 71); + emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n"); } @@ -649,49 +735,63 @@ ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) void ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1028, vD, vA, vB); + emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE); } /** vector and complement */ void ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1092, vD, vA, vB); + emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE); } /** vector or */ void ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1156, vD, vA, vB); + emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE); } /** vector nor */ void ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1284, vD, vA, vB); + emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE); } /** vector xor */ void ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1220, vD, vA, vB); + emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE); } /** Pseudo-instruction: vector move */ void ppc_vmove(struct ppc_function *p, uint vD, uint vA) { + boolean print = p->print; + p->print = FALSE; ppc_vor(p, vD, vA, vA); + if (print) { + indent(p); + printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA); + } + p->print = print; } /** Set vector register to {0,0,0,0} */ void ppc_vzero(struct ppc_function *p, uint vr) { + boolean print = p->print; + p->print = FALSE; ppc_vxor(p, vr, vr, vr); + if (print) { + indent(p); + printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr); + } + p->print = print; } @@ -705,35 +805,35 @@ ppc_vzero(struct ppc_function *p, uint vr) void ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 43, vD, vA, vB, vC); + emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u"); } /** vector select */ void ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 42, vD, vA, vB, vC); + emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u"); } /** vector splat byte */ void ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm) { - emit_vx(p, 42, vD, imm, vB); + emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE); } /** vector splat half word */ void ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm) { - emit_vx(p, 588, vD, imm, vB); + emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE); } /** vector splat word */ void ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) { - emit_vx(p, 652, vD, imm, vB); + emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE); } /** vector splat signed immediate word */ @@ -742,14 +842,14 @@ ppc_vspltisw(struct ppc_function *p, uint vD, int imm) { assert(imm >= -16); assert(imm < 15); - emit_vx(p, 908, vD, imm, 0); + emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE); } /** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ void ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 388, vD, vA, vB); + emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE); } @@ -763,63 +863,66 @@ ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) void ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 14, rt, ra, imm); + emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE); } /** rt = ra + (imm << 16) */ void ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 15, rt, ra, imm); + emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE); } /** rt = ra + rb */ void ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_xo(p, 31, rt, ra, rb, 0, 266, 0); + emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n"); } /** rt = ra AND ra */ void ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_x(p, 31, ra, rt, rb, 28); /* note argument order */ + emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n"); /* note argument order */ } /** rt = ra AND imm */ void ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 28, ra, rt, imm); /* note argument order */ + /* note argument order */ + emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE); } /** rt = ra OR ra */ void ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_x(p, 31, ra, rt, rb, 444); /* note argument order */ + emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n"); /* note argument order */ } /** rt = ra OR imm */ void ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 24, ra, rt, imm); /* note argument order */ + /* note argument order */ + emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE); } /** rt = ra XOR ra */ void ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_x(p, 31, ra, rt, rb, 316); /* note argument order */ + emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n"); /* note argument order */ } /** rt = ra XOR imm */ void ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 26, ra, rt, imm); /* note argument order */ + /* note argument order */ + emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE); } /** pseudo instruction: move: rt = ra */ @@ -833,7 +936,14 @@ ppc_mr(struct ppc_function *p, uint rt, uint ra) void ppc_li(struct ppc_function *p, uint rt, int imm) { + boolean print = p->print; + p->print = FALSE; ppc_addi(p, rt, 0, imm); + if (print) { + indent(p); + printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm); + } + p->print = print; } /** rt = imm << 16 */ @@ -864,21 +974,21 @@ ppc_load_int(struct ppc_function *p, uint rt, int imm) void ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d) { - emit_d(p, 37, rs, ra, d); + emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE); } /** store rs at memory[(ra)+d] */ void ppc_stw(struct ppc_function *p, uint rs, uint ra, int d) { - emit_d(p, 36, rs, ra, d); + emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE); } /** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */ void ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) { - emit_d(p, 32, rt, ra, d); + emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE); } @@ -891,42 +1001,42 @@ ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) void ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb) { - emit_a(p, 63, frt, fra, frb, 21, 0); + emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n"); } /** sub: frt = fra - frb */ void ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb) { - emit_a(p, 63, frt, fra, frb, 20, 0); + emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n"); } /** convert to int: rt = (int) ra */ void ppc_fctiwz(struct ppc_function *p, uint rt, uint fra) { - emit_x(p, 63, rt, 0, fra, 15); + emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n"); } /** store frs at mem[(ra)+offset] */ void ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset) { - emit_d(p, 52, frs, ra, offset); + emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); } /** store frs at mem[(ra)+(rb)] */ void ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb) { - emit_x(p, 31, frs, ra, rb, 983); + emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n"); } /** load frt = mem[(ra)+offset] */ void ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset) { - emit_d(p, 48, frt, ra, offset); + emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); } @@ -942,6 +1052,10 @@ void ppc_blr(struct ppc_function *p) { emit_i(p, 18, 0, 0, 1); + if (p->print) { + indent(p); + printf("blr\n"); + } } /** Branch Conditional to Link Register (p. 36) */ @@ -949,6 +1063,10 @@ void ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg) { emit_xl(p, 19, condOp, condReg, branchHint, 16, 0); + if (p->print) { + indent(p); + printf("bclr\t%u %u %u\n", condOp, branchHint, condReg); + } } /** Pseudo instruction: return from subroutine */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 08212a2a25..93e5f5187d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -58,6 +59,8 @@ struct ppc_function uint32_t reg_used; /** used/free general-purpose registers bitmask */ uint32_t fp_used; /** used/free floating point registers bitmask */ uint32_t vec_used; /** used/free vector registers bitmask */ + int indent; + boolean print; }; @@ -68,6 +71,10 @@ extern uint ppc_num_instructions(const struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); extern void ppc_dump_func(const struct ppc_function *p); +extern void ppc_print_code(struct ppc_function *p, boolean enable); +extern void ppc_indent(struct ppc_function *p, int spaces); +extern void ppc_comment(struct ppc_function *p, int rel_indent, const char *s); + extern int ppc_reserve_register(struct ppc_function *p, int reg); extern int ppc_allocate_register(struct ppc_function *p); extern void ppc_release_register(struct ppc_function *p, int reg); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index b9a75ae559..071bc2015c 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -443,7 +443,7 @@ void spe_init_func(struct spe_function *p, unsigned code_size) p->regs[i] = 1; } - p->print = false; + p->print = FALSE; p->indent = 0; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index a92b1902e3..1a4db47501 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -78,15 +78,7 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = { * How many TGSI temps should be implemented with real PPC vector registers * rather than memory. */ -#define MAX_PPC_TEMPS 4 - - -struct reg_chan_vec -{ - struct tgsi_full_src_register src; - uint chan; - uint vec; -}; +#define MAX_PPC_TEMPS 3 /** @@ -158,6 +150,29 @@ init_gen_context(struct gen_context *gen, struct ppc_function *func) /** + * Is the given TGSI register stored as a real PPC vector register? + */ +static boolean +is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) +{ + return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && + reg->SrcRegister.Index < MAX_PPC_TEMPS); +} + + +/** + * Is the given TGSI register stored as a real PPC vector register? + */ +static boolean +is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) +{ + return (reg->DstRegister.File == TGSI_FILE_TEMPORARY && + reg->DstRegister.Index < MAX_PPC_TEMPS); +} + + + +/** * All PPC vector load/store instructions form an effective address * by adding the contents of two registers. For example: * lvx v2,r8,r9 # v2 = memory[r8 + r9] @@ -285,7 +300,7 @@ emit_fetch(struct gen_context *gen, } break; case TGSI_FILE_TEMPORARY: - if (reg->SrcRegister.Index < MAX_PPC_TEMPS) { + if (is_ppc_vec_temporary(reg)) { /* use PPC vec register */ dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; } @@ -353,23 +368,33 @@ emit_fetch(struct gen_context *gen, uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); if (sign_op != TGSI_UTIL_SIGN_KEEP) { int bit31_vec = gen_get_bit31_vec(gen); + int dst_vec2; + + if (is_ppc_vec_temporary(reg)) { + /* need to use a new temp */ + dst_vec2 = ppc_allocate_vec_register(gen->f); + } + else { + dst_vec2 = dst_vec; + } switch (sign_op) { case TGSI_UTIL_SIGN_CLEAR: /* vec = vec & ~bit31 */ - ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec); + ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); break; case TGSI_UTIL_SIGN_SET: /* vec = vec | bit31 */ - ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec); + ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); break; case TGSI_UTIL_SIGN_TOGGLE: /* vec = vec ^ bit31 */ - ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec); + ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); break; default: assert(0); } + return dst_vec2; } } @@ -452,8 +477,7 @@ release_src_vecs(struct gen_context *gen) uint i; for (i = 0; i < gen->num_regs; i++) { const const struct tgsi_full_src_register src = gen->regs[i].src; - if (!(src.SrcRegister.File == TGSI_FILE_TEMPORARY && - src.SrcRegister.Index < MAX_PPC_TEMPS)) { + if (!is_ppc_vec_temporary(&src)) { ppc_release_vec_register(gen->f, gen->regs[i].vec); } } @@ -469,8 +493,7 @@ get_dst_vec(struct gen_context *gen, { const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Index < MAX_PPC_TEMPS) { + if (is_ppc_vec_temporary_dst(reg)) { int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; return vec; } @@ -502,7 +525,7 @@ emit_store(struct gen_context *gen, } break; case TGSI_FILE_TEMPORARY: - if (reg->DstRegister.Index < MAX_PPC_TEMPS) { + if (is_ppc_vec_temporary_dst(reg)) { if (!free_vec) { int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index]; if (dst_vec != src_vec) @@ -584,6 +607,7 @@ static void emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) { uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ int v1 = get_dst_vec(gen, inst, chan_index); @@ -770,7 +794,7 @@ emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) v2 = ppc_allocate_vec_register(gen->f); - ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ + ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */ v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */ @@ -815,7 +839,7 @@ ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) ppc_vzero(f, zero_vec); ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ - ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */ + ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ ppc_release_vec_register(f, t_vec); @@ -1221,9 +1245,12 @@ emit_prologue(struct ppc_function *func) static void emit_epilogue(struct ppc_function *func) { + ppc_comment(func, -4, "Epilogue:"); ppc_return(func); /* XXX restore prev stack frame */ +#if 0 debug_printf("PPC: Emitted %u instructions\n", func->num_inst); +#endif } @@ -1248,6 +1275,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, unsigned ok = 1; uint num_immediates = 0; struct gen_context gen; + uint ic = 0; if (use_ppc_asm < 0) { /* If GALLIUM_NOPPC is set, don't use PPC codegen */ @@ -1280,6 +1308,12 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, break; case TGSI_TOKEN_TYPE_INSTRUCTION: + if (func->print) { + _debug_printf("# "); + ic++; + tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); + } + ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); if (!ok) { diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 12d7ef9a37..c92f8e5cba 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -54,6 +54,9 @@ INCLUDE_DIRS = \ $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ +.c.s: + $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + default: $(CELL_LIB) diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 8f3deb482e..5a889a6119 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -145,7 +145,7 @@ get_const_one_reg(struct codegen *gen) gen->one_reg = spe_allocate_available_register(gen->f); spe_indent(gen->f, 4); - spe_comment(gen->f, -4, "INIT CONSTANT 1.0:"); + spe_comment(gen->f, -4, "init constant reg = 1.0:"); /* one = {1.0, 1.0, 1.0, 1.0} */ spe_load_float(gen->f, gen->one_reg, 1.0f); @@ -168,7 +168,7 @@ get_address_reg(struct codegen *gen) gen->addr_reg = spe_allocate_available_register(gen->f); spe_indent(gen->f, 4); - spe_comment(gen->f, -4, "INIT CONSTANT 1.0:"); + spe_comment(gen->f, -4, "init address reg = 0:"); /* init addr = {0, 0, 0, 0} */ spe_zero(gen->f, gen->addr_reg); @@ -479,7 +479,7 @@ emit_prologue(struct codegen *gen) { gen->frame_size = 1024; /* XXX temporary, should be dynamic */ - spe_comment(gen->f, -4, "Function prologue:"); + spe_comment(gen->f, 0, "Function prologue:"); /* save $lr on stack # stqd $lr,16($sp) */ spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); @@ -515,7 +515,7 @@ emit_epilogue(struct codegen *gen) { const int return_reg = 3; - spe_comment(gen->f, -4, "Function epilogue:"); + spe_comment(gen->f, 0, "Function epilogue:"); spe_comment(gen->f, 0, "return the killed mask"); if (gen->kill_mask_reg > 0) { @@ -561,8 +561,6 @@ emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch = 0, src_reg, addr_reg; - spe_comment(gen->f, -4, "ARL:"); - src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); addr_reg = get_address_reg(gen); @@ -580,8 +578,6 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, src_reg[4], dst_reg[4]; - spe_comment(gen->f, -4, "MOV:"); - FOR_EACH_ENABLED_CHANNEL(inst, ch) { src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); @@ -612,20 +608,6 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], s2_reg[4], d_reg[4]; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - spe_comment(gen->f, -4, "ADD:"); - break; - case TGSI_OPCODE_SUB: - spe_comment(gen->f, -4, "SUB:"); - break; - case TGSI_OPCODE_MUL: - spe_comment(gen->f, -4, "MUL:"); - break; - default: - assert(0); - } - /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -670,7 +652,7 @@ static boolean emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; - spe_comment(gen->f, -4, "MAD:"); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); @@ -695,7 +677,7 @@ static boolean emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; - spe_comment(gen->f, -4, "LERP:"); + /* setup/get src/dst/temp regs */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -730,14 +712,6 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], d_reg[4], tmp_reg[4]; - if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { - spe_comment(gen->f, -4, "RCP:"); - } - else { - assert(inst->Instruction.Opcode == TGSI_OPCODE_RSQ); - spe_comment(gen->f, -4, "RSQ:"); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); @@ -778,8 +752,6 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], d_reg[4]; const int bit31mask_reg = get_itemp(gen); - spe_comment(gen->f, -4, "ABS:"); - /* mask with bit 31 set, the rest cleared */ spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); @@ -812,8 +784,6 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) int s2x_reg, s2y_reg, s2z_reg; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - spe_comment(gen->f, -4, "DP3:"); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); @@ -854,8 +824,6 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1x_reg, s1y_reg, s1z_reg, s1w_reg; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - spe_comment(gen->f, -4, "DP4:"); - s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); @@ -898,8 +866,6 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) { /* XXX rewrite this function to look more like DP3/DP4 */ int ch; - spe_comment(gen->f, -4, "DPH:"); - int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); int tmp_reg = get_itemp(gen); @@ -941,8 +907,6 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) int src_reg[3]; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - spe_comment(gen->f, -4, "NRM3:"); - src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); @@ -981,8 +945,6 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) { - spe_comment(gen->f, -4, "XPD:"); - int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); int tmp_reg = get_itemp(gen); @@ -1044,32 +1006,6 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; bool complement = FALSE; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_SGT: - spe_comment(gen->f, -4, "SGT:"); - break; - case TGSI_OPCODE_SLT: - spe_comment(gen->f, -4, "SLT:"); - break; - case TGSI_OPCODE_SGE: - spe_comment(gen->f, -4, "SGE:"); - complement = TRUE; - break; - case TGSI_OPCODE_SLE: - spe_comment(gen->f, -4, "SLE:"); - complement = TRUE; - break; - case TGSI_OPCODE_SEQ: - spe_comment(gen->f, -4, "SEQ:"); - break; - case TGSI_OPCODE_SNE: - spe_comment(gen->f, -4, "SNE:"); - complement = TRUE; - break; - default: - ; - } - one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { @@ -1088,15 +1024,18 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) break; case TGSI_OPCODE_SGE: spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); + complement = TRUE; break; case TGSI_OPCODE_SLE: spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + complement = TRUE; break; case TGSI_OPCODE_SEQ: spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); break; case TGSI_OPCODE_SNE: spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + complement = TRUE; break; default: assert(0); @@ -1129,8 +1068,6 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; - spe_comment(gen->f, -4, "CMP:"); - FOR_EACH_ENABLED_CHANNEL(inst, ch) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); @@ -1161,8 +1098,6 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], d_reg[4]; - spe_comment(gen->f, -4, "TRUNC:"); - FOR_EACH_ENABLED_CHANNEL(inst, ch) { s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); @@ -1198,8 +1133,6 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; - spe_comment(gen->f, -4, "FLR:"); - zero_reg = get_itemp(gen); spe_zero(gen->f, zero_reg); one_reg = get_const_one_reg(gen); @@ -1248,8 +1181,6 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; - spe_comment(gen->f, -4, "FRC:"); - zero_reg = get_itemp(gen); spe_zero(gen->f, zero_reg); one_reg = get_const_one_reg(gen); @@ -1577,8 +1508,6 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; - spe_comment(gen->f, -4, "MAX:"); - FOR_EACH_ENABLED_CHANNEL(inst, ch) { s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); @@ -1646,8 +1575,6 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) const int channel = 0; int cond_reg; - spe_comment(gen->f, -4, "IF:"); - cond_reg = get_cond_mask_reg(gen); /* XXX push cond exec mask */ @@ -1682,8 +1609,6 @@ emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) { const int cond_reg = get_cond_mask_reg(gen); - spe_comment(gen->f, -4, "ELSE:"); - spe_comment(gen->f, 0, "cond exec mask = !cond exec mask"); spe_complement(gen->f, cond_reg, cond_reg); emit_update_exec_mask(gen); @@ -1695,8 +1620,6 @@ emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) { - spe_comment(gen->f, -4, "ENDIF:"); - /* XXX todo: pop cond exec mask */ gen->if_nesting--; @@ -1712,8 +1635,6 @@ emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) { int exec_reg, loop_reg; - spe_comment(gen->f, -4, "BGNLOOP:"); - exec_reg = get_exec_mask_reg(gen); loop_reg = get_loop_mask_reg(gen); @@ -1736,8 +1657,6 @@ emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) const int tmp_reg = get_itemp(gen); int offset; - spe_comment(gen->f, -4, "ENDLOOP:"); - /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */ spe_orx(gen->f, tmp_reg, loop_reg); @@ -1762,8 +1681,6 @@ emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) const int exec_reg = get_exec_mask_reg(gen); const int loop_reg = get_loop_mask_reg(gen); - spe_comment(gen->f, -4, "BREAK:"); - assert(gen->loop_nesting > 0); spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask"); @@ -1778,8 +1695,6 @@ emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst) { - spe_comment(gen->f, -4, "CONT:"); - assert(gen->loop_nesting > 0); return TRUE; @@ -1792,8 +1707,6 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, { int ch; - spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:"); - FOR_EACH_ENABLED_CHANNEL(inst, ch) { int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); @@ -1829,7 +1742,6 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, static boolean emit_END(struct codegen *gen) { - spe_comment(gen->f, -4, "END:"); emit_epilogue(gen); return TRUE; } @@ -1962,8 +1874,6 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) assert(gen->num_imm < MAX_TEMPS); - spe_comment(gen->f, -4, "IMMEDIATE:"); - for (ch = 0; ch < 4; ch++) { float val = immed->u.ImmediateFloat32[ch].Float; @@ -2028,7 +1938,7 @@ emit_declaration(struct cell_context *cell, sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, gen->temp_regs[i][0], gen->temp_regs[i][1], gen->temp_regs[i][2], gen->temp_regs[i][3]); - spe_comment(gen->f, -4, buf); + spe_comment(gen->f, 0, buf); } } break; @@ -2056,6 +1966,7 @@ cell_gen_fragment_program(struct cell_context *cell, { struct tgsi_parse_context parse; struct codegen gen; + uint ic = 0; memset(&gen, 0, sizeof(gen)); gen.cell = cell; @@ -2073,7 +1984,7 @@ cell_gen_fragment_program(struct cell_context *cell, if (cell->debug_flags & CELL_DEBUG_ASM) { spe_print_code(f, TRUE); - spe_indent(f, 8); + spe_indent(f, 2*8); printf("Begin %s\n", __FUNCTION__); tgsi_dump(tokens, 0); } @@ -2087,16 +1998,29 @@ cell_gen_fragment_program(struct cell_context *cell, switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: + if (f->print) { + _debug_printf(" # "); + tgsi_dump_immediate(&parse.FullToken.FullImmediate); + } if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) gen.error = TRUE; break; case TGSI_TOKEN_TYPE_DECLARATION: + if (f->print) { + _debug_printf(" # "); + tgsi_dump_declaration(&parse.FullToken.FullDeclaration); + } if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) gen.error = TRUE; break; case TGSI_TOKEN_TYPE_INSTRUCTION: + if (f->print) { + _debug_printf(" # "); + ic++; + tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); + } if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) gen.error = TRUE; break; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index aa670b9a45..37d297cc0f 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -21,7 +21,7 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) } else { nr_faces = 1; } - + pitch = pt->width[0]; for (l = 0; l <= pt->last_level; l++) { pt->width[l] = width; @@ -76,13 +76,15 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | PIPE_TEXTURE_USAGE_DISPLAY_TARGET)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else { switch (pt->format) { /* TODO: Figure out which formats can be swizzled */ case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_X8R8G8B8_UNORM: - /* XXX: Re-enable when SIFM size limits are fixed */ - /*case PIPE_FORMAT_R16_SNORM:*/ + case PIPE_FORMAT_R16_SNORM: break; default: mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; @@ -192,4 +194,3 @@ nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) pscreen->get_tex_surface = nv30_miptree_surface_new; pscreen->tex_surface_release = nv30_miptree_surface_del; } - diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index d754892299..29356e8c1e 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -98,7 +98,7 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z16_UNORM: return TRUE; @@ -110,7 +110,7 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: @@ -139,7 +139,8 @@ nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, if (!mt->shadow_tex) { unsigned old_tex_usage = surface->texture->tex_usage; - surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR; + surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR | + PIPE_TEXTURE_USAGE_DYNAMIC; mt->shadow_tex = screen->texture_create(screen, surface->texture); surface->texture->tex_usage = old_tex_usage; @@ -326,7 +327,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 3); so_method(so, screen->rankine, 0x1450, 1); so_data (so, 0x00030004); - + /* NEW */ so_method(so, screen->rankine, 0x1e98, 1); so_data (so, 0); @@ -382,4 +383,3 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) return &screen->pipe; } - diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index b68967c07f..00ce6be985 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -57,6 +57,8 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { struct pipe_winsys *ws = pscreen->winsys; struct nv40_miptree *mt; + unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE; mt = MALLOC(sizeof(struct nv40_miptree)); if (!mt) @@ -75,25 +77,27 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | PIPE_TEXTURE_USAGE_DISPLAY_TARGET)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else { switch (pt->format) { /* TODO: Figure out which formats can be swizzled */ case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_X8R8G8B8_UNORM: - /* XXX: Re-enable when SIFM size limits are fixed */ - /*case PIPE_FORMAT_R16_SNORM:*/ + case PIPE_FORMAT_R16_SNORM: break; default: mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; } } + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + nv40_miptree_layout(mt); - mt->buffer = ws->buffer_create(ws, 256, - PIPE_BUFFER_USAGE_PIXEL | - NOUVEAU_BUFFER_USAGE_TEXTURE, - mt->total_size); + mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); if (!mt->buffer) { FREE(mt); return NULL; diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 995148e03f..9657a19c50 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -148,7 +148,8 @@ nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, if (!mt->shadow_tex) { unsigned old_tex_usage = surface->texture->tex_usage; - surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR; + surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR | + PIPE_TEXTURE_USAGE_DYNAMIC; mt->shadow_tex = screen->texture_create(screen, surface->texture); surface->texture->tex_usage = old_tex_usage; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 6bfac581f9..5c6a92b53b 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -171,6 +171,7 @@ enum pipe_texture_target { #define PIPE_TEXTURE_USAGE_PRIMARY 0x4 /* ie a frontbuffer */ #define PIPE_TEXTURE_USAGE_DEPTH_STENCIL 0x8 #define PIPE_TEXTURE_USAGE_SAMPLER 0x10 +#define PIPE_TEXTURE_USAGE_DYNAMIC 0x20 /** Pipe driver custom usage flags should be greater or equal to this value */ #define PIPE_TEXTURE_USAGE_CUSTOM (1 << 16) diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index 4f7a953484..cddfca54fe 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,9 +1,12 @@ TARGET = libg3dvl.a OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_util.o vl_basic_csc.o \ - vl_r16snorm_mc.o vl_r16snorm_mc_buf.o + vl_r16snorm_mc_buf.o GALLIUMDIR = ../.. -CFLAGS += -g -Wall -fPIC -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl +CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/auxiliary \ + -I${GALLIUMDIR}/winsys/g3dvl \ ############################################# diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index e3b3d03256..3ce93cf49d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -1,13 +1,13 @@ #define VL_INTERNAL #include "vl_basic_csc.h" #include <assert.h> -#include <stdlib.h> #include <pipe/p_context.h> #include <pipe/p_winsys.h> #include <pipe/p_state.h> #include <pipe/p_inlines.h> #include <tgsi/tgsi_parse.h> #include <tgsi/tgsi_build.h> +#include <util/u_memory.h> #include "vl_csc.h" #include "vl_surface.h" #include "vl_shader_build.h" @@ -237,7 +237,7 @@ static int vlDestroy pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vs_const_buf.buffer); pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->fs_const_buf.buffer); - free(basic_csc); + FREE(basic_csc); return 0; } @@ -369,7 +369,7 @@ static int vlCreateVertexShader assert(context); pipe = csc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); @@ -430,7 +430,7 @@ static int vlCreateVertexShader vs.tokens = tokens; csc->vertex_shader = pipe->create_vs_state(pipe, &vs); - free(tokens); + FREE(tokens); return 0; } @@ -456,7 +456,7 @@ static int vlCreateFragmentShader assert(context); pipe = csc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); @@ -517,7 +517,7 @@ static int vlCreateFragmentShader fs.tokens = tokens; csc->fragment_shader = pipe->create_fs_state(pipe, &fs); - free(tokens); + FREE(tokens); return 0; } @@ -626,7 +626,7 @@ static int vlCreateDataBufs memcpy ( pipe->winsys->buffer_map(pipe->winsys, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &bt_601, + &bt_601_full, sizeof(struct vlFragmentShaderConsts) ); @@ -691,7 +691,7 @@ int vlCreateBasicCSC assert(pipe); assert(csc); - basic_csc = calloc(1, sizeof(struct vlBasicCSC)); + basic_csc = CALLOC_STRUCT(vlBasicCSC); if (!basic_csc) return 1; diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index fe107e406d..fbea1363d8 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -1,11 +1,10 @@ #define VL_INTERNAL #include "vl_context.h" #include <assert.h> -#include <stdlib.h> #include <pipe/p_context.h> #include <pipe/p_state.h> +#include <util/u_memory.h> #include "vl_render.h" -#include "vl_r16snorm_mc.h" #include "vl_r16snorm_mc_buf.h" #include "vl_csc.h" #include "vl_basic_csc.h" @@ -112,7 +111,7 @@ int vlCreateContext assert(context); assert(pipe); - ctx = calloc(1, sizeof(struct vlContext)); + ctx = CALLOC_STRUCT(vlContext); if (!ctx) return 1; @@ -127,7 +126,6 @@ int vlCreateContext vlInitCommon(ctx); - /*vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render);*/ vlCreateR16SNormBufferedMC(pipe, picture_width, picture_height, picture_format, &ctx->render); vlCreateBasicCSC(pipe, &ctx->csc); @@ -154,7 +152,7 @@ int vlDestroyContext context->pipe->delete_rasterizer_state(context->pipe, context->raster); context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->dsa); - free(context); + FREE(context); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_display.c b/src/gallium/state_trackers/g3dvl/vl_display.c index af80faa7f5..dce06de758 100644 --- a/src/gallium/state_trackers/g3dvl/vl_display.c +++ b/src/gallium/state_trackers/g3dvl/vl_display.c @@ -1,7 +1,7 @@ #define VL_INTERNAL #include "vl_display.h" #include <assert.h> -#include <stdlib.h> +#include <util/u_memory.h> int vlCreateDisplay ( @@ -14,7 +14,7 @@ int vlCreateDisplay assert(native_display); assert(display); - dpy = calloc(1, sizeof(struct vlDisplay)); + dpy = CALLOC_STRUCT(vlDisplay); if (!dpy) return 1; @@ -32,7 +32,7 @@ int vlDestroyDisplay { assert(display); - free(display); + FREE(display); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c deleted file mode 100644 index 3272220ef8..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c +++ /dev/null @@ -1,2344 +0,0 @@ -#define VL_INTERNAL -#include "vl_r16snorm_mc.h" -#include <assert.h> -#include <stdlib.h> -#include <pipe/p_context.h> -#include <pipe/p_winsys.h> -#include <pipe/p_state.h> -#include <pipe/p_inlines.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_build.h> -#include "vl_render.h" -#include "vl_shader_build.h" -#include "vl_surface.h" -#include "vl_util.h" -#include "vl_types.h" -#include "vl_defs.h" - -#define NUM_BUFS 4 /* Number of rotating buffers to use */ - -struct vlVertexShaderConsts -{ - /*struct vlVertex4f scale; - struct vlVertex4f denorm;*/ - struct vlVertex4f scale; - struct vlVertex4f mb_pos_trans; - struct vlVertex4f denorm; - struct - { - struct vlVertex4f top_field; - struct vlVertex4f bottom_field; - } mb_tc_trans[2]; -}; - -struct vlFragmentShaderConsts -{ - struct vlVertex4f multiplier; - struct vlVertex4f div; -}; - -struct vlR16SnormMC -{ - struct vlRender base; - - unsigned int video_width, video_height; - enum vlFormat video_format; - unsigned int cur_buf; - - struct pipe_context *pipe; - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state render_target; - struct pipe_sampler_state *samplers[5]; - struct pipe_texture *textures[NUM_BUFS][5]; - void *i_vs, *p_vs[2], *b_vs[2]; - void *i_fs, *p_fs[2], *b_fs[2]; - struct pipe_vertex_buffer vertex_bufs[3]; - struct pipe_vertex_element vertex_elems[3]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; -}; - -static int vlBegin -( - struct vlRender *render -) -{ - struct vlR16SnormMC *mc; - struct pipe_context *pipe; - - assert(render); - - mc = (struct vlR16SnormMC*)render; - pipe = mc->pipe; - - /* Frame buffer set in vlRender*Macroblock() */ - /* Shaders, samplers, textures set in vlRender*Macroblock() */ - pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs); - pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); - pipe->set_viewport_state(pipe, &mc->viewport); - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); - - return 0; -} - -/*static int vlGrabMacroBlock -( - struct vlR16SnormMC *mc, - struct vlMpeg2MacroBlock *macroblock -) -{ - assert(mc); - assert(macroblock); - - - - return 0; -}*/ - -/*#define DO_IDCT*/ - -#ifdef DO_IDCT -static int vlTransformBlock(short *src, short *dst, short bias) -{ - static const float basis[8][8] = - { - {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, - {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, - {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, - {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, - {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, - {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, - {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, - {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} - }; - - unsigned int x, y; - short tmp[64]; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - tmp[y * VL_BLOCK_WIDTH + x] = (short) - ( - src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + - src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + - src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + - src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + - src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + - src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + - src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + - src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] - ); - - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - { - dst[y * VL_BLOCK_WIDTH + x] = bias + (short) - ( - tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + - tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + - tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + - tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + - tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + - tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + - tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + - tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] - ); - if (dst[y * VL_BLOCK_WIDTH + x] > 255) - dst[y * VL_BLOCK_WIDTH + x] = 255; - else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) - dst[y * VL_BLOCK_WIDTH + x] = 0; - } - return 0; -} -#endif - -static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} - -static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - dst += VL_BLOCK_HEIGHT * dst_pitch; - - for (; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} - -static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memset - ( - dst + y * dst_pitch, - 0, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} - -enum vlSampleType -{ - vlSampleTypeFull, - vlSampleTypeDiff -}; - -static int vlGrabBlocks -( - struct vlR16SnormMC *mc, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - enum vlSampleType sample_type, - short *blocks -) -{ - struct pipe_surface *tex_surface; - short *texels; - unsigned int tex_pitch; - unsigned int tb, sb = 0; - - assert(mc); - assert(blocks); - - tex_surface = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUFS][0], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - for (tb = 0; tb < 4; ++tb) - { - if ((coded_block_pattern >> (5 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); -#endif - - if (dct_type == vlDCTTypeFrameCoded) - vlGrabFrameCodedBlock - ( - cur_block, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - else - vlGrabFieldCodedBlock - ( - cur_block, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch); - } - - pipe_surface_unmap(tex_surface); - - /* TODO: Implement 422, 444 */ - for (tb = 0; tb < 2; ++tb) - { - tex_surface = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUFS][tb + 1], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - if ((coded_block_pattern >> (1 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); -#endif - - vlGrabFrameCodedBlock - ( - cur_block, - texels, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels, tex_pitch); - - pipe_surface_unmap(tex_surface); - } - - return 0; -} - -static int vlRenderIMacroBlock -( - struct vlR16SnormMC *mc, - enum vlPictureType picture_type, - enum vlFieldOrder field_order, - unsigned int mbx, - unsigned int mby, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - short *blocks, - struct vlSurface *surface -) -{ - struct pipe_context *pipe; - struct vlVertexShaderConsts *vs_consts; - - assert(blocks); - assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != vlPictureTypeFrame) - return 0; - - vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeFull, blocks); - - pipe = mc->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); - - mc->render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &mc->render_target); - pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUFS]); - pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); - pipe->bind_vs_state(pipe, mc->i_vs); - pipe->bind_fs_state(pipe, mc->i_fs); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - mc->cur_buf++; - - return 0; -} - -static int vlRenderPMacroBlock -( - struct vlR16SnormMC *mc, - enum vlPictureType picture_type, - enum vlFieldOrder field_order, - unsigned int mbx, - unsigned int mby, - enum vlMotionType mc_type, - short top_x, - short top_y, - short bottom_x, - short bottom_y, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - short *blocks, - struct vlSurface *ref_surface, - struct vlSurface *surface -) -{ - struct pipe_context *pipe; - struct vlVertexShaderConsts *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); - assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != vlPictureTypeFrame) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) - return 0; - - vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); - - pipe = mc->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - - if (mc_type == vlMotionTypeField) - { - vs_consts->denorm.x = (float)surface->texture->width[0]; - vs_consts->denorm.y = (float)surface->texture->height[0]; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, mc->p_vs[1]); - pipe->bind_fs_state(pipe, mc->p_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, mc->p_vs[0]); - pipe->bind_fs_state(pipe, mc->p_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); - - mc->render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &mc->render_target); - - mc->textures[mc->cur_buf % NUM_BUFS][3] = ref_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUFS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - mc->cur_buf++; - - return 0; -} - -static int vlRenderBMacroBlock -( - struct vlR16SnormMC *mc, - enum vlPictureType picture_type, - enum vlFieldOrder field_order, - unsigned int mbx, - unsigned int mby, - enum vlMotionType mc_type, - short top_past_x, - short top_past_y, - short bottom_past_x, - short bottom_past_y, - short top_future_x, - short top_future_y, - short bottom_future_x, - short bottom_future_y, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - short *blocks, - struct vlSurface *past_surface, - struct vlSurface *future_surface, - struct vlSurface *surface -) -{ - struct pipe_context *pipe; - struct vlVertexShaderConsts *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); - assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != vlPictureTypeFrame) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) - return 0; - - vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); - - pipe = mc->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_past_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_past_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_future_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_future_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[1].top_field.z = 0.0f; - vs_consts->mb_tc_trans[1].top_field.w = 0.0f; - - if (mc_type == vlMotionTypeField) - { - vs_consts->denorm.x = (float)surface->texture->width[0]; - vs_consts->denorm.y = (float)surface->texture->height[0]; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_past_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_past_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_future_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_future_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, mc->b_vs[1]); - pipe->bind_fs_state(pipe, mc->b_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, mc->b_vs[0]); - pipe->bind_fs_state(pipe, mc->b_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); - - mc->render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &mc->render_target); - - mc->textures[mc->cur_buf % NUM_BUFS][3] = past_surface->texture; - mc->textures[mc->cur_buf % NUM_BUFS][4] = future_surface->texture; - pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUFS]); - pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - mc->cur_buf++; - - return 0; -} - -static int vlRenderMacroBlocksMpeg2R16Snorm -( - struct vlRender *render, - struct vlMpeg2MacroBlockBatch *batch, - struct vlSurface *surface -) -{ - struct vlR16SnormMC *mc; - unsigned int i; - - assert(render); - - mc = (struct vlR16SnormMC*)render; - - /*for (i = 0; i < batch->num_macroblocks; ++i) - vlGrabMacroBlock(batch->macroblocks[i]);*/ - - for (i = 0; i < batch->num_macroblocks; ++i) - { - switch (batch->macroblocks[i].mb_type) - { - case vlMacroBlockTypeIntra: - { - vlRenderIMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - surface - ); - break; - } - case vlMacroBlockTypeFwdPredicted: - { - vlRenderPMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].mo_type, - batch->macroblocks[i].PMV[0][0][0], - batch->macroblocks[i].PMV[0][0][1], - batch->macroblocks[i].PMV[1][0][0], - batch->macroblocks[i].PMV[1][0][1], - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - batch->past_surface, - surface - ); - break; - } - case vlMacroBlockTypeBkwdPredicted: - { - vlRenderPMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].mo_type, - batch->macroblocks[i].PMV[0][1][0], - batch->macroblocks[i].PMV[0][1][1], - batch->macroblocks[i].PMV[1][1][0], - batch->macroblocks[i].PMV[1][1][1], - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - batch->future_surface, - surface - ); - break; - } - case vlMacroBlockTypeBiPredicted: - { - vlRenderBMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].mo_type, - batch->macroblocks[i].PMV[0][0][0], - batch->macroblocks[i].PMV[0][0][1], - batch->macroblocks[i].PMV[1][0][0], - batch->macroblocks[i].PMV[1][0][1], - batch->macroblocks[i].PMV[0][1][0], - batch->macroblocks[i].PMV[0][1][1], - batch->macroblocks[i].PMV[1][1][0], - batch->macroblocks[i].PMV[1][1][1], - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - batch->past_surface, - batch->future_surface, - surface - ); - break; - } - default: - assert(0); - } - } - - return 0; -} - -static int vlEnd -( - struct vlRender *render -) -{ - assert(render); - - return 0; -} - -static int vlFlush -( - struct vlRender *render -) -{ - assert(render); - - return 0; -} - -static int vlDestroy -( - struct vlRender *render -) -{ - struct vlR16SnormMC *mc; - struct pipe_context *pipe; - unsigned int i; - - assert(render); - - mc = (struct vlR16SnormMC*)render; - pipe = mc->pipe; - - for (i = 0; i < 5; ++i) - pipe->delete_sampler_state(pipe, mc->samplers[i]); - - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[i].buffer); - - /* Textures 3 & 4 are not created directly, no need to release them here */ - for (i = 0; i < NUM_BUFS; ++i) - { - pipe_texture_release(&mc->textures[i][0]); - pipe_texture_release(&mc->textures[i][1]); - pipe_texture_release(&mc->textures[i][2]); - } - - pipe->delete_vs_state(pipe, mc->i_vs); - pipe->delete_fs_state(pipe, mc->i_fs); - - for (i = 0; i < 2; ++i) - { - pipe->delete_vs_state(pipe, mc->p_vs[i]); - pipe->delete_fs_state(pipe, mc->p_fs[i]); - pipe->delete_vs_state(pipe, mc->b_vs[i]); - pipe->delete_fs_state(pipe, mc->b_fs[i]); - } - - pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); - pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); - - free(mc); - - return 0; -} - -/* - * Represents 8 triangles (4 quads, 1 per block) in noormalized coords - * that render a macroblock. - * Need to be scaled to cover mbW*mbH macroblock pixels and translated into - * position on target surface. - */ -static const struct vlVertex2f macroblock_verts[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, - {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, - - {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, - {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, - - {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 4 luma blocks arranged - * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. - */ -static const struct vlVertex2f macroblock_luma_texcoords[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, - - {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, - {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, - - {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, - {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 1 chroma block. - * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. - */ -static const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 2 chroma blocks arranged - * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. - * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. - */ -static const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 4 chroma blocks. - * Same case as 4 luma blocks. - */ -static const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; - -/* - * Used when rendering P and B macroblocks, multiplier is applied to the A channel, - * which is then added to the L channel, then the bias is subtracted from that to - * get back the differential. The differential is then added to the samples from the - * reference surface(s). - */ -static const struct vlFragmentShaderConsts fs_consts = -{ - {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, - {0.5f, 2.0f, 0.0f, 0.0f} -}; - -static int vlCreateVertexShaderIMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderIMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - */ - for (i = 0; i < 2; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul o0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFramePMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Ref macroblock texcoords - */ - for (i = 0; i < 4; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldPMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field ref macroblock texcoords - * decl o4 ; Bottom field ref macroblock texcoords - * decl o5 ; Denormalized vertex pos - */ - for (i = 0; i < 6; i++) - { - decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - mov o1, i1 ; Move input luma texcoords to output - mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock - add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o5, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFramePMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldPMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Denormalized vertex pos - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t4 */ - decl = vl_decl_temps(0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFrameBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move past ref macroblock texcoords into position - * decl c4 ; Unused - * decl c5 ; Translation vector to move future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Past ref macroblock texcoords - * decl o4 ; Future ref macroblock texcoords - */ - for (i = 0; i < 5; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on past ref macroblock - add o4, t0, c5 ; Translate rect into position on future ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position - * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position - * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field past ref macroblock texcoords - * decl o4 ; Bottom field past ref macroblock texcoords - * decl o5 ; Top field future ref macroblock texcoords - * decl o6 ; Bottom field future ref macroblock texcoords - * decl o7 ; Denormalized vertex pos - */ - for (i = 0; i < 8; i++) - { - decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* - * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock - * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock - * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock - * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o7, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFrameBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s4 - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t2 */ - decl = vl_decl_temps(0, 2); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock - * tex2d t2, i3, s4 ; Read texel from future ref macroblock - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Texcoords for s4 - * decl i5 ; Texcoords for s4 - * decl i6 ; Denormalized vertex pos - */ - for (i = 0; i < 7; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels - * ; and for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t5 */ - decl = vl_decl_temps(0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field - * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateDataBufs -( - struct vlR16SnormMC *mc -) -{ - struct pipe_context *pipe; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - - /* Create our vertex buffer and vertex buffer element */ - mc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); - mc->vertex_bufs[0].max_index = 23; - mc->vertex_bufs[0].buffer_offset = 0; - mc->vertex_bufs[0].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 24 - ); - - mc->vertex_elems[0].src_offset = 0; - mc->vertex_elems[0].vertex_buffer_index = 0; - mc->vertex_elems[0].nr_components = 2; - mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Create our texcoord buffers and texcoord buffer elements */ - for (i = 1; i < 3; ++i) - { - mc->vertex_bufs[i].pitch = sizeof(struct vlVertex2f); - mc->vertex_bufs[i].max_index = 23; - mc->vertex_bufs[i].buffer_offset = 0; - mc->vertex_bufs[i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 24 - ); - - mc->vertex_elems[i].src_offset = 0; - mc->vertex_elems[i].vertex_buffer_index = i; - mc->vertex_elems[i].nr_components = 2; - mc->vertex_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; - } - - /* Fill buffers */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - macroblock_verts, - sizeof(struct vlVertex2f) * 24 - ); - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - macroblock_luma_texcoords, - sizeof(struct vlVertex2f) * 24 - ); - /* TODO: Accomodate 422, 444 */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - macroblock_chroma_420_texcoords, - sizeof(struct vlVertex2f) * 24 - ); - - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_unmap(pipe->winsys, mc->vertex_bufs[i].buffer); - - /* Create our constant buffer */ - mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); - mc->vs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - mc->vs_const_buf.size - ); - - mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); - mc->fs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - mc->fs_const_buf.size - ); - - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &fs_consts, - sizeof(struct vlFragmentShaderConsts) - ); - - pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); - - return 0; -} - -static int vlInit -( - struct vlR16SnormMC *mc -) -{ - struct pipe_context *pipe; - struct pipe_sampler_state sampler; - struct pipe_texture template; - unsigned int filters[5]; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - - /* For MC we render to textures, which are rounded up to nearest POT */ - mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width); - mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height); - mc->viewport.scale[2] = 1; - mc->viewport.scale[3] = 1; - mc->viewport.translate[0] = 0; - mc->viewport.translate[1] = 0; - mc->viewport.translate[2] = 0; - mc->viewport.translate[3] = 0; - - mc->render_target.width = vlRoundUpPOT(mc->video_width); - mc->render_target.height = vlRoundUpPOT(mc->video_height); - mc->render_target.num_cbufs = 1; - /* FB for MC stage is a vlSurface, set in vlSetRenderSurface() */ - mc->render_target.zsbuf = NULL; - - filters[0] = PIPE_TEX_FILTER_NEAREST; - filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[3] = PIPE_TEX_FILTER_LINEAR; - filters[4] = PIPE_TEX_FILTER_LINEAR; - - for (i = 0; i < 5; ++i) - { - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = filters[i]; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = filters[i]; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ - /*sampler.shadow_ambient = ;*/ - /*sampler.lod_bias = ;*/ - sampler.min_lod = 0; - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler); - } - - memset(&template, 0, sizeof(struct pipe_texture)); - template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_R16_SNORM; - template.last_level = 0; - template.width[0] = 8; - template.height[0] = 8 * 4; - template.depth[0] = 1; - template.compressed = 0; - pf_get_block(template.format, &template.block); - - for (i = 0; i < NUM_BUFS; ++i) - mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); - - if (mc->video_format == vlFormatYCbCr420) - template.height[0] = 8; - else if (mc->video_format == vlFormatYCbCr422) - template.height[0] = 8 * 2; - else if (mc->video_format == vlFormatYCbCr444) - template.height[0] = 8 * 4; - else - assert(0); - - for (i = 0; i < NUM_BUFS; ++i) - { - mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template); - mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template); - } - - /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ - - vlCreateVertexShaderIMB(mc); - vlCreateFragmentShaderIMB(mc); - vlCreateVertexShaderFramePMB(mc); - vlCreateVertexShaderFieldPMB(mc); - vlCreateFragmentShaderFramePMB(mc); - vlCreateFragmentShaderFieldPMB(mc); - vlCreateVertexShaderFrameBMB(mc); - vlCreateVertexShaderFieldBMB(mc); - vlCreateFragmentShaderFrameBMB(mc); - vlCreateFragmentShaderFieldBMB(mc); - vlCreateDataBufs(mc); - - return 0; -} - -int vlCreateR16SNormMC -( - struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum vlFormat video_format, - struct vlRender **render -) -{ - struct vlR16SnormMC *mc; - - assert(pipe); - assert(render); - - mc = calloc(1, sizeof(struct vlR16SnormMC)); - - mc->base.vlBegin = &vlBegin; - mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16Snorm; - mc->base.vlEnd = &vlEnd; - mc->base.vlFlush = &vlFlush; - mc->base.vlDestroy = &vlDestroy; - mc->pipe = pipe; - mc->video_width = video_width; - mc->video_height = video_height; - mc->cur_buf = 0; - - vlInit(mc); - - *render = &mc->base; - - return 0; -} diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h deleted file mode 100644 index 9842926bf7..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef vl_r16snorm_mc_h -#define vl_r16snorm_mc_h - -#include "vl_types.h" - -struct pipe_context; -struct vlRender; - -int vlCreateR16SNormMC -( - struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum vlFormat video_format, - struct vlRender **render -); - -#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 650528ed8f..c5a73b2bf2 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -1,7 +1,6 @@ #define VL_INTERNAL #include "vl_r16snorm_mc_buf.h" #include <assert.h> -#include <stdlib.h> #include <pipe/p_context.h> #include <pipe/p_winsys.h> #include <pipe/p_screen.h> @@ -10,6 +9,7 @@ #include <tgsi/tgsi_parse.h> #include <tgsi/tgsi_build.h> #include <util/u_math.h> +#include <util/u_memory.h> #include "vl_render.h" #include "vl_shader_build.h" #include "vl_surface.h" @@ -17,16 +17,7 @@ #include "vl_types.h" #include "vl_defs.h" -/* - * TODO: Dynamically determine number of buf sets to use, based on - * video size and available mem, since we can easily run out of memory - * for high res videos. - * Note: Destroying previous frame's buffers and creating new ones - * doesn't work, since the buffer are not actually destroyed until their - * fence is signalled, and if we render fast enough we will create faster - * than we destroy. - */ -#define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */ +const unsigned int DEFAULT_BUF_ALIGNMENT = 1; enum vlMacroBlockTypeEx { @@ -52,36 +43,79 @@ struct vlFragmentShaderConsts struct vlVertex4f div; }; +struct vlMacroBlockVertexStream0 +{ + struct vlVertex2f pos; + struct vlVertex2f luma_tc; + struct vlVertex2f cb_tc; + struct vlVertex2f cr_tc; +}; + struct vlR16SnormBufferedMC { struct vlRender base; - unsigned int picture_width, picture_height; + unsigned int picture_width; + unsigned int picture_height; enum vlFormat picture_format; + unsigned int macroblocks_per_picture; - unsigned int cur_buf; struct vlSurface *buffered_surface; - struct vlSurface *past_surface, *future_surface; + struct vlSurface *past_surface; + struct vlSurface *future_surface; struct vlVertex2f surface_tex_inv_size; struct vlVertex2f zero_block[3]; unsigned int num_macroblocks; struct vlMpeg2MacroBlock *macroblocks; + struct pipe_surface *tex_surface[3]; + short *texels[3]; struct pipe_context *pipe; struct pipe_viewport_state viewport; struct pipe_framebuffer_state render_target; - struct pipe_sampler_state *samplers[5]; - struct pipe_texture *textures[NUM_BUF_SETS][5]; - struct pipe_surface *tex_surface[3]; - short *texels[3]; + + union + { + void *all[5]; + struct + { + void *y; + void *cb; + void *cr; + void *ref[2]; + }; + } samplers; + + union + { + struct pipe_texture *all[5]; + struct + { + struct pipe_texture *y; + struct pipe_texture *cb; + struct pipe_texture *cr; + struct pipe_texture *ref[2]; + }; + } textures; + + union + { + struct pipe_vertex_buffer all[3]; + struct + { + struct pipe_vertex_buffer ycbcr; + struct pipe_vertex_buffer ref[2]; + }; + } vertex_bufs; + void *i_vs, *p_vs[2], *b_vs[2]; void *i_fs, *p_fs[2], *b_fs[2]; - struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3]; struct pipe_vertex_element vertex_elems[8]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; + struct pipe_constant_buffer vs_const_buf; + struct pipe_constant_buffer fs_const_buf; }; -static int vlBegin +static inline int vlBegin ( struct vlRender *render ) @@ -360,11 +394,13 @@ static inline int vlGrabMacroBlock (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ } -static inline int vlGrabMacroBlockVB +static inline int vlGenMacroblockVerts ( struct vlR16SnormBufferedMC *mc, struct vlMpeg2MacroBlock *macroblock, - unsigned int pos + unsigned int pos, + struct vlMacroBlockVertexStream0 *ycbcr_vb, + struct vlVertex2f **ref_vb ) { struct vlVertex2f mo_vec[2]; @@ -372,6 +408,7 @@ static inline int vlGrabMacroBlockVB assert(mc); assert(macroblock); + assert(ycbcr_vb); switch (macroblock->mb_type) { @@ -379,12 +416,9 @@ static inline int vlGrabMacroBlockVB { struct vlVertex2f *vb; - vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map - ( - mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ) + pos * 2 * 24; + assert(ref_vb && ref_vb[1]); + + vb = ref_vb[1] + pos * 2 * 24; mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; @@ -411,8 +445,6 @@ static inline int vlGrabMacroBlockVB } } - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer); - /* fall-through */ } case vlMacroBlockTypeFwdPredicted: @@ -420,12 +452,9 @@ static inline int vlGrabMacroBlockVB { struct vlVertex2f *vb; - vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map - ( - mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ) + pos * 2 * 24; + assert(ref_vb && ref_vb[0]); + + vb = ref_vb[0] + pos * 2 * 24; if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted) { @@ -469,8 +498,6 @@ static inline int vlGrabMacroBlockVB } } - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer); - /* fall-through */ } case vlMacroBlockTypeIntra: @@ -486,20 +513,9 @@ static inline int vlGrabMacroBlockVB mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) }; - struct vlMacroBlockVertexStream0 - { - struct vlVertex2f pos; - struct vlVertex2f luma_tc; - struct vlVertex2f cb_tc; - struct vlVertex2f cr_tc; - } *vb; + struct vlMacroBlockVertexStream0 *vb; - vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map - ( - mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ) + pos * 24; + vb = ycbcr_vb + pos * 24; SET_BLOCK ( @@ -533,8 +549,6 @@ static inline int vlGrabMacroBlockVB 4, 2, 1, mc->zero_block ); - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer); - break; } default: @@ -555,9 +569,6 @@ static int vlFlush unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0}; unsigned int offset[vlNumMacroBlockExTypes]; unsigned int vb_start = 0; - unsigned int mbw; - unsigned int mbh; - unsigned int num_mb_per_frame; unsigned int i; assert(render); @@ -567,11 +578,7 @@ static int vlFlush if (!mc->buffered_surface) return 0; - mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; - mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; - num_mb_per_frame = mbw * mbh; - - if (mc->num_macroblocks < num_mb_per_frame) + if (mc->num_macroblocks < mc->macroblocks_per_picture) return 0; pipe = mc->pipe; @@ -588,15 +595,39 @@ static int vlFlush for (i = 1; i < vlNumMacroBlockExTypes; ++i) offset[i] = offset[i - 1] + num_macroblocks[i - 1]; - for (i = 0; i < mc->num_macroblocks; ++i) { - enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]); + struct vlMacroBlockVertexStream0 *ycbcr_vb; + struct vlVertex2f *ref_vb[2]; + + ycbcr_vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs.ycbcr.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); - vlGrabMacroBlockVB(mc, &mc->macroblocks[i], offset[mb_type_ex]); + for (i = 0; i < 2; ++i) + ref_vb[i] = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs.ref[i].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); - offset[mb_type_ex]++; + for (i = 0; i < mc->num_macroblocks; ++i) + { + enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]); + + vlGenMacroblockVerts(mc, &mc->macroblocks[i], offset[mb_type_ex], ycbcr_vb, ref_vb); + + offset[mb_type_ex]++; + } + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ycbcr.buffer); + for (i = 0; i < 2; ++i) + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ref[i].buffer); } - + for (i = 0; i < 3; ++i) { pipe_surface_unmap(mc->tex_surface[i]); @@ -628,10 +659,10 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeIntra] > 0) { - pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); - pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); + pipe->set_sampler_textures(pipe, 3, mc->textures.all); + pipe->bind_sampler_states(pipe, 3, mc->samplers.all); pipe->bind_vs_state(pipe, mc->i_vs); pipe->bind_fs_state(pipe, mc->i_fs); @@ -641,11 +672,11 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + mc->textures.ref[0] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); pipe->bind_vs_state(pipe, mc->p_vs[0]); pipe->bind_fs_state(pipe, mc->p_fs[0]); @@ -655,11 +686,11 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + mc->textures.ref[0] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); pipe->bind_vs_state(pipe, mc->p_vs[1]); pipe->bind_fs_state(pipe, mc->p_fs[1]); @@ -669,11 +700,11 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + mc->textures.ref[0] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); pipe->bind_vs_state(pipe, mc->p_vs[0]); pipe->bind_fs_state(pipe, mc->p_fs[0]); @@ -683,11 +714,11 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + mc->textures.ref[0] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); pipe->bind_vs_state(pipe, mc->p_vs[1]); pipe->bind_fs_state(pipe, mc->p_fs[1]); @@ -697,12 +728,12 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0) { - pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 8, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; - mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; - pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + mc->textures.ref[0] = mc->past_surface->texture; + mc->textures.ref[1] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures.all); + pipe->bind_sampler_states(pipe, 5, mc->samplers.all); pipe->bind_vs_state(pipe, mc->b_vs[0]); pipe->bind_fs_state(pipe, mc->b_fs[0]); @@ -712,12 +743,12 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0) { - pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 8, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; - mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; - pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + mc->textures.ref[0] = mc->past_surface->texture; + mc->textures.ref[1] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures.all); + pipe->bind_sampler_states(pipe, 5, mc->samplers.all); pipe->bind_vs_state(pipe, mc->b_vs[1]); pipe->bind_fs_state(pipe, mc->b_fs[1]); @@ -726,13 +757,13 @@ static int vlFlush } pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &mc->buffered_surface->render_fence); + pipe->screen->tex_surface_release(pipe->screen, &mc->render_target.cbufs[0]); for (i = 0; i < 3; ++i) mc->zero_block[i].x = -1.0f; mc->buffered_surface = NULL; mc->num_macroblocks = 0; - mc->cur_buf++; return 0; } @@ -745,6 +776,7 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered ) { struct vlR16SnormBufferedMC *mc; + bool new_surface = false; unsigned int i; assert(render); @@ -756,39 +788,26 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered if (mc->buffered_surface != surface) { vlFlush(&mc->base); - mc->buffered_surface = surface; - mc->past_surface = batch->past_surface; - mc->future_surface = batch->future_surface; - mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; - mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; - - for (i = 0; i < 3; ++i) - { - mc->tex_surface[i] = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUF_SETS][i], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - mc->texels[i] = pipe_surface_map(mc->tex_surface[i], PIPE_BUFFER_USAGE_CPU_WRITE); - } + new_surface = true; } } else + new_surface = true; + + if (new_surface) { mc->buffered_surface = surface; mc->past_surface = batch->past_surface; mc->future_surface = batch->future_surface; mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; - + for (i = 0; i < 3; ++i) { mc->tex_surface[i] = mc->pipe->screen->get_tex_surface ( mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUF_SETS][i], + mc->textures.all[i], 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE ); @@ -802,7 +821,7 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered return 0; } -static int vlEnd +static inline int vlEnd ( struct vlRender *render ) @@ -819,7 +838,7 @@ static int vlDestroy { struct vlR16SnormBufferedMC *mc; struct pipe_context *pipe; - unsigned int h, i; + unsigned int i; assert(render); @@ -827,19 +846,14 @@ static int vlDestroy pipe = mc->pipe; for (i = 0; i < 5; ++i) - pipe->delete_sampler_state(pipe, mc->samplers[i]); + pipe->delete_sampler_state(pipe, mc->samplers.all[i]); - for (h = 0; h < NUM_BUF_SETS; ++h) - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[h][i].buffer); + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs.all[i].buffer); /* Textures 3 & 4 are not created directly, no need to release them here */ - for (i = 0; i < NUM_BUF_SETS; ++i) - { - pipe_texture_release(&mc->textures[i][0]); - pipe_texture_release(&mc->textures[i][1]); - pipe_texture_release(&mc->textures[i][2]); - } + for (i = 0; i < 3; ++i) + pipe_texture_release(&mc->textures.all[i]); pipe->delete_vs_state(pipe, mc->i_vs); pipe->delete_fs_state(pipe, mc->i_fs); @@ -855,8 +869,8 @@ static int vlDestroy pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); - free(mc->macroblocks); - free(mc); + FREE(mc->macroblocks); + FREE(mc); return 0; } @@ -882,42 +896,39 @@ static int vlCreateDataBufs { const unsigned int mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; const unsigned int mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; - const unsigned int num_mb_per_frame = mbw * mbh; struct pipe_context *pipe; - unsigned int h, i; + unsigned int i; assert(mc); pipe = mc->pipe; + mc->macroblocks_per_picture = mbw * mbh; /* Create our vertex buffers */ - for (h = 0; h < NUM_BUF_SETS; ++h) + mc->vertex_bufs.ycbcr.pitch = sizeof(struct vlVertex2f) * 4; + mc->vertex_bufs.ycbcr.max_index = 24 * mc->macroblocks_per_picture - 1; + mc->vertex_bufs.ycbcr.buffer_offset = 0; + mc->vertex_bufs.ycbcr.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 4 * 24 * mc->macroblocks_per_picture + ); + + for (i = 1; i < 3; ++i) { - mc->vertex_bufs[h][0].pitch = sizeof(struct vlVertex2f) * 4; - mc->vertex_bufs[h][0].max_index = 24 * num_mb_per_frame - 1; - mc->vertex_bufs[h][0].buffer_offset = 0; - mc->vertex_bufs[h][0].buffer = pipe->winsys->buffer_create + mc->vertex_bufs.all[i].pitch = sizeof(struct vlVertex2f) * 2; + mc->vertex_bufs.all[i].max_index = 24 * mc->macroblocks_per_picture - 1; + mc->vertex_bufs.all[i].buffer_offset = 0; + mc->vertex_bufs.all[i].buffer = pipe->winsys->buffer_create ( pipe->winsys, - 1, + DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 4 * 24 * num_mb_per_frame + sizeof(struct vlVertex2f) * 2 * 24 * mc->macroblocks_per_picture ); - - for (i = 1; i < 3; ++i) - { - mc->vertex_bufs[h][i].pitch = sizeof(struct vlVertex2f) * 2; - mc->vertex_bufs[h][i].max_index = 24 * num_mb_per_frame - 1; - mc->vertex_bufs[h][i].buffer_offset = 0; - mc->vertex_bufs[h][i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame - ); - } } /* Position element */ @@ -973,7 +984,7 @@ static int vlCreateDataBufs mc->vs_const_buf.buffer = pipe->winsys->buffer_create ( pipe->winsys, - 1, + DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_CONSTANT, mc->vs_const_buf.size ); @@ -982,7 +993,7 @@ static int vlCreateDataBufs mc->fs_const_buf.buffer = pipe->winsys->buffer_create ( pipe->winsys, - 1, + DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_CONSTANT, mc->fs_const_buf.size ); @@ -996,7 +1007,7 @@ static int vlCreateDataBufs pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); - mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * num_mb_per_frame); + mc->macroblocks = MALLOC(sizeof(struct vlMpeg2MacroBlock) * mc->macroblocks_per_picture); return 0; } @@ -1016,6 +1027,13 @@ static int vlInit pipe = mc->pipe; + mc->buffered_surface = NULL; + mc->past_surface = NULL; + mc->future_surface = NULL; + for (i = 0; i < 3; ++i) + mc->zero_block[i].x = -1.0f; + mc->num_macroblocks = 0; + /* For MC we render to textures, which are rounded up to nearest POT */ mc->viewport.scale[0] = vlRoundUpPOT(mc->picture_width); mc->viewport.scale[1] = vlRoundUpPOT(mc->picture_height); @@ -1057,7 +1075,7 @@ static int vlInit /*sampler.max_lod = ;*/ /*sampler.border_color[i] = ;*/ /*sampler.max_anisotropy = ;*/ - mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler); + mc->samplers.all[i] = pipe->create_sampler_state(pipe, &sampler); } memset(&template, 0, sizeof(struct pipe_texture)); @@ -1069,10 +1087,9 @@ static int vlInit template.depth[0] = 1; template.compressed = 0; pf_get_block(template.format, &template.block); - template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; - for (i = 0; i < NUM_BUF_SETS; ++i) - mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); + mc->textures.y = pipe->screen->texture_create(pipe->screen, &template); if (mc->picture_format == vlFormatYCbCr420) { @@ -1082,13 +1099,10 @@ static int vlInit else if (mc->picture_format == vlFormatYCbCr422) template.height[0] = vlRoundUpPOT(mc->picture_height / 2); - for (i = 0; i < NUM_BUF_SETS; ++i) - { - mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template); - mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template); - } + mc->textures.cb = pipe->screen->texture_create(pipe->screen, &template); + mc->textures.cr = pipe->screen->texture_create(pipe->screen, &template); - /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ + /* textures.all[3] & textures.all[4] are assigned from vlSurfaces for P and B macroblocks at render time */ vlCreateVertexShaderIMB(mc); vlCreateFragmentShaderIMB(mc); @@ -1114,13 +1128,12 @@ int vlCreateR16SNormBufferedMC struct vlRender **render ) { - struct vlR16SnormBufferedMC *mc; - unsigned int i; + struct vlR16SnormBufferedMC *mc; assert(pipe); assert(render); - mc = calloc(1, sizeof(struct vlR16SnormBufferedMC)); + mc = CALLOC_STRUCT(vlR16SnormBufferedMC); mc->base.vlBegin = &vlBegin; mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered; @@ -1131,14 +1144,6 @@ int vlCreateR16SNormBufferedMC mc->picture_width = picture_width; mc->picture_height = picture_height; - mc->cur_buf = 0; - mc->buffered_surface = NULL; - mc->past_surface = NULL; - mc->future_surface = NULL; - for (i = 0; i < 3; ++i) - mc->zero_block[i].x = -1.0f; - mc->num_macroblocks = 0; - vlInit(mc); *render = &mc->base; diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.c b/src/gallium/state_trackers/g3dvl/vl_screen.c index 484f63b0d4..ade8643a66 100644 --- a/src/gallium/state_trackers/g3dvl/vl_screen.c +++ b/src/gallium/state_trackers/g3dvl/vl_screen.c @@ -1,7 +1,7 @@ #define VL_INTERNAL #include "vl_screen.h" #include <assert.h> -#include <stdlib.h> +#include <util/u_memory.h> int vlCreateScreen ( @@ -17,7 +17,7 @@ int vlCreateScreen assert(pscreen); assert(vl_screen); - scrn = calloc(1, sizeof(struct vlScreen)); + scrn = CALLOC_STRUCT(vlScreen); if (!scrn) return 1; @@ -37,7 +37,7 @@ int vlDestroyScreen { assert(screen); - free(screen); + FREE(screen); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 076bd40d41..911469f966 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -1,11 +1,11 @@ #define VL_INTERNAL #include "vl_surface.h" #include <assert.h> -#include <stdlib.h> #include <string.h> #include <pipe/p_screen.h> #include <pipe/p_state.h> #include <pipe/p_inlines.h> +#include <util/u_memory.h> #include <vl_winsys.h> #include "vl_screen.h" #include "vl_context.h" @@ -28,7 +28,7 @@ int vlCreateSurface assert(screen); assert(surface); - sfc = calloc(1, sizeof(struct vlSurface)); + sfc = CALLOC_STRUCT(vlSurface); if (!sfc) return 1; @@ -64,7 +64,7 @@ int vlDestroySurface assert(surface); pipe_texture_release(&surface->texture); - free(surface); + FREE(surface); return 0; } diff --git a/src/gallium/winsys/drm/nouveau/Makefile b/src/gallium/winsys/drm/nouveau/Makefile index 81562ca78d..b5735329ec 100644 --- a/src/gallium/winsys/drm/nouveau/Makefile +++ b/src/gallium/winsys/drm/nouveau/Makefile @@ -1,46 +1,25 @@ - TOP = ../../../../.. include $(TOP)/configs/current -LIBNAME = nouveau_dri.so - -MINIGLX_SOURCES = - -PIPE_DRIVERS = \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/nv04/libnv04.a \ - $(TOP)/src/gallium/drivers/nv10/libnv10.a \ - $(TOP)/src/gallium/drivers/nv20/libnv20.a \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv40/libnv40.a \ - $(TOP)/src/gallium/drivers/nv50/libnv50.a - -DRIVER_SOURCES = \ - nouveau_bo.c \ - nouveau_channel.c \ - nouveau_context.c \ - nouveau_device.c \ - nouveau_dma.c \ - nouveau_fence.c \ - nouveau_grobj.c \ - nouveau_lock.c \ - nouveau_notifier.c \ - nouveau_pushbuf.c \ - nouveau_resource.c \ - nouveau_screen.c \ - nouveau_swapbuffers.c \ - nouveau_winsys.c \ - nouveau_winsys_pipe.c \ - nouveau_winsys_softpipe.c \ - nv04_surface.c \ - nv50_surface.c - -C_SOURCES = \ - $(COMMON_GALLIUM_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -include ../Makefile.template - -symlinks: + +SUBDIRS = common dri + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` + rm -f `find . -name depend` + + +# Dummy install target +install: diff --git a/src/gallium/winsys/drm/nouveau/common/Makefile b/src/gallium/winsys/drm/nouveau/common/Makefile new file mode 100644 index 0000000000..06f558959d --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/Makefile @@ -0,0 +1,32 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = nouveaudrm + +C_SOURCES = \ + nouveau_bo.c \ + nouveau_channel.c \ + nouveau_context.c \ + nouveau_device.c \ + nouveau_dma.c \ + nouveau_fence.c \ + nouveau_grobj.c \ + nouveau_lock.c \ + nouveau_notifier.c \ + nouveau_pushbuf.c \ + nouveau_resource.c \ + nouveau_screen.c \ + nouveau_winsys.c \ + nouveau_winsys_pipe.c \ + nouveau_winsys_softpipe.c \ + nv04_surface.c \ + nv50_surface.c + + +include ./Makefile.template + +DRIVER_DEFINES = $(shell pkg-config libdrm --cflags \ + && pkg-config libdrm --atleast-version=2.3.1 \ + && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") +symlinks: + diff --git a/src/gallium/winsys/drm/nouveau/common/Makefile.template b/src/gallium/winsys/drm/nouveau/common/Makefile.template new file mode 100644 index 0000000000..e40836e0a8 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/Makefile.template @@ -0,0 +1,59 @@ +# -*-makefile-*- + +COMMON_SOURCES = + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + + +### Include directories +INCLUDES = \ + -I. \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/include \ + $(DRIVER_INCLUDES) + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: depend symlinks $(LIBNAME) + + +$(LIBNAME): $(OBJECTS) Makefile Makefile.template + $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS) + + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ + $(ASM_SOURCES) 2> /dev/null + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean:: + -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) + -rm -f depend depend.bak + + +include depend diff --git a/src/gallium/winsys/drm/nouveau/nouveau_bo.c b/src/gallium/winsys/drm/nouveau/common/nouveau_bo.c index b5942994d9..76b98bed67 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_bo.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_bo.c @@ -273,6 +273,40 @@ nouveau_bo_del(struct nouveau_bo **bo) } int +nouveau_bo_busy(struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + struct nouveau_fence *fence; + + if (!nvbo) + return -EINVAL; + + /* If the buffer is pending it must be busy, unless + * both are RD, in which case we can allow access */ + if (nvbo->pending) { + if ((nvbo->pending->flags & NOUVEAU_BO_RDWR) == NOUVEAU_BO_RD && + (flags & NOUVEAU_BO_RDWR) == NOUVEAU_BO_RD) + return 0; + else + return 1; + } + + if (flags & NOUVEAU_BO_WR) + fence = nvbo->fence; + else + fence = nvbo->wr_fence; + + /* If the buffer is not pending and doesn't have a fence + * that conflicts with our flags then it can't be busy + */ + if (!fence) + return 0; + else + /* If the fence is signalled the buffer is not busy, else is busy */ + return !nouveau_fence(fence)->signalled; +} + +int nouveau_bo_map(struct nouveau_bo *bo, uint32_t flags) { struct nouveau_bo_priv *nvbo = nouveau_bo(bo); diff --git a/src/gallium/winsys/drm/nouveau/nouveau_channel.c b/src/gallium/winsys/drm/nouveau/common/nouveau_channel.c index 3b4dcd1ecf..b7298131c1 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_channel.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_channel.c @@ -23,7 +23,7 @@ #include <stdlib.h> #include <string.h> #include <errno.h> - +#include <util/u_memory.h> #include "nouveau_drmif.h" #include "nouveau_dma.h" @@ -38,7 +38,7 @@ nouveau_channel_alloc(struct nouveau_device *dev, uint32_t fb_ctxdma, if (!nvdev || !chan || *chan) return -EINVAL; - nvchan = calloc(1, sizeof(struct nouveau_channel_priv)); + nvchan = CALLOC_STRUCT(nouveau_channel_priv); if (!nvchan) return -ENOMEM; nvchan->base.device = dev; @@ -48,7 +48,7 @@ nouveau_channel_alloc(struct nouveau_device *dev, uint32_t fb_ctxdma, ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_CHANNEL_ALLOC, &nvchan->drm, sizeof(nvchan->drm)); if (ret) { - free(nvchan); + FREE(nvchan); return ret; } @@ -111,16 +111,16 @@ nouveau_channel_free(struct nouveau_channel **chan) nvchan = nouveau_channel(*chan); *chan = NULL; nvdev = nouveau_device(nvchan->base.device); - + FIRE_RING_CH(&nvchan->base); nouveau_grobj_free(&nvchan->base.vram); nouveau_grobj_free(&nvchan->base.gart); nouveau_grobj_free(&nvchan->base.nullobj); + FREE(nvchan->pb.buffers); + FREE(nvchan->pb.relocs); cf.channel = nvchan->drm.channel; drmCommandWrite(nvdev->fd, DRM_NOUVEAU_CHANNEL_FREE, &cf, sizeof(cf)); - free(nvchan); + FREE(nvchan); } - - diff --git a/src/gallium/winsys/drm/nouveau/nouveau_context.c b/src/gallium/winsys/drm/nouveau/common/nouveau_context.c index 74413c408f..2f245046d4 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_context.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_context.c @@ -1,28 +1,13 @@ -#include "main/glheader.h" -#include "glapi/glthread.h" -#include <GL/internal/glcore.h> -#include "utils.h" - -#include "state_tracker/st_public.h" -#include "state_tracker/st_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "pipe/p_screen.h" - +#include <pipe/p_defines.h> +#include <pipe/p_context.h> +#include <pipe/p_screen.h> +#include <util/u_memory.h> #include "nouveau_context.h" #include "nouveau_dri.h" #include "nouveau_local.h" #include "nouveau_screen.h" #include "nouveau_winsys_pipe.h" -#ifdef DEBUG -static const struct dri_debug_control debug_control[] = { - { "bo", DEBUG_BO }, - { NULL, 0 } -}; -int __nouveau_debug = 0; -#endif - static void nouveau_channel_context_destroy(struct nouveau_channel_context *nvc) { @@ -87,24 +72,17 @@ nouveau_channel_context_create(struct nouveau_device *dev) return nvc; } -GLboolean -nouveau_context_create(const __GLcontextModes *glVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) +int +nouveau_context_init(struct nouveau_screen *nv_screen, + drm_context_t hHWContext, drmLock *sarea_lock, + struct nouveau_context *nv_share, + struct nouveau_context *nv) { - __DRIscreenPrivate *driScrnPriv = driContextPriv->driScreenPriv; - struct nouveau_screen *nv_screen = driScrnPriv->private; - struct nouveau_context *nv = CALLOC_STRUCT(nouveau_context); struct pipe_context *pipe = NULL; - struct st_context *st_share = NULL; struct nouveau_channel_context *nvc = NULL; struct nouveau_device *dev = nv_screen->device; int i; - if (sharedContextPrivate) { - st_share = ((struct nouveau_context *)sharedContextPrivate)->st; - } - switch (dev->chipset & 0xf0) { case 0x10: case 0x20: @@ -121,27 +99,18 @@ nouveau_context_create(const __GLcontextModes *glVis, break; default: NOUVEAU_ERR("Unsupported chipset: NV%02x\n", dev->chipset); - return GL_FALSE; + return 1; } - driContextPriv->driverPrivate = (void *)nv; nv->nv_screen = nv_screen; - nv->dri_screen = driScrnPriv; { struct nouveau_device_priv *nvdev = nouveau_device(dev); - nvdev->ctx = driContextPriv->hHWContext; - nvdev->lock = (drmLock *)&driScrnPriv->pSAREA->lock; + nvdev->ctx = hHWContext; + nvdev->lock = sarea_lock; } - driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, - nv->dri_screen->myNum, "nouveau"); -#ifdef DEBUG - __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), - debug_control); -#endif - /*XXX: Hack up a fake region and buffer object for front buffer. * This will go away with TTM, replaced with a simple reference * of the front buffer handle passed to us by the DDX. @@ -185,12 +154,8 @@ nouveau_context_create(const __GLcontextModes *glVis, * a single process. */ nvc = nv_screen->nvc; - if (!nvc && st_share) { - struct nouveau_context *snv = st_share->pipe->priv; - if (snv) { - nvc = snv->nvc; - } - } + if (!nvc && nv_share) + nvc = nv_share->nvc; /*XXX: temporary - disable multi-context/single-channel on pre-NV4x */ switch (dev->chipset & 0xf0) { @@ -211,7 +176,7 @@ nouveau_context_create(const __GLcontextModes *glVis, nvc = nouveau_channel_context_create(dev); if (!nvc) { NOUVEAU_ERR("Failed initialising GPU context\n"); - return GL_FALSE; + return 1; } nv_screen->nvc = nvc; } @@ -241,11 +206,11 @@ nouveau_context_create(const __GLcontextModes *glVis, case 0x80: case 0x90: if (nouveau_surface_init_nv50(nv)) - return GL_FALSE; + return 1; break; default: if (nouveau_surface_init_nv04(nv)) - return GL_FALSE; + return 1; break; } @@ -268,26 +233,22 @@ nouveau_context_create(const __GLcontextModes *glVis, pipe = nouveau_create_softpipe(nv); if (!pipe) { NOUVEAU_ERR("Error creating pipe, bailing\n"); - return GL_FALSE; + return 1; } } pipe->priv = nv; - nv->st = st_create_context(pipe, glVis, st_share); - return GL_TRUE; + + return 0; } void -nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) +nouveau_context_cleanup(struct nouveau_context *nv) { - struct nouveau_context *nv = driContextPriv->driverPrivate; struct nouveau_channel_context *nvc = nv->nvc; assert(nv); - st_finish(nv->st); - st_destroy_context(nv->st); - if (nv->pctx_id >= 0) { nvc->pctx[nv->pctx_id] = NULL; if (--nvc->refcount <= 0) { @@ -295,52 +256,7 @@ nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) nv->nv_screen->nvc = NULL; } } - - free(nv); -} - -GLboolean -nouveau_context_bind(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv) -{ - struct nouveau_context *nv; - struct nouveau_framebuffer *draw, *read; - - if (!driContextPriv) { - st_make_current(NULL, NULL, NULL); - return GL_TRUE; - } - - nv = driContextPriv->driverPrivate; - draw = driDrawPriv->driverPrivate; - read = driReadPriv->driverPrivate; - - st_make_current(nv->st, draw->stfb, read->stfb); - - if ((nv->dri_drawable != driDrawPriv) || - (nv->last_stamp != driDrawPriv->lastStamp)) { - nv->dri_drawable = driDrawPriv; - st_resize_framebuffer(draw->stfb, driDrawPriv->w, - driDrawPriv->h); - nv->last_stamp = driDrawPriv->lastStamp; - } - - if (driDrawPriv != driReadPriv) { - st_resize_framebuffer(read->stfb, driReadPriv->w, - driReadPriv->h); - } - - return GL_TRUE; -} - -GLboolean -nouveau_context_unbind(__DRIcontextPrivate *driContextPriv) -{ - struct nouveau_context *nv = driContextPriv->driverPrivate; - (void)nv; - - st_flush(nv->st, 0, NULL); - return GL_TRUE; + + /* XXX: Who cleans up the pipe? */ } diff --git a/src/gallium/winsys/drm/nouveau/nouveau_context.h b/src/gallium/winsys/drm/nouveau/common/nouveau_context.h index 77e2147a2c..b1bdb01bdf 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_context.h +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_context.h @@ -1,17 +1,10 @@ #ifndef __NOUVEAU_CONTEXT_H__ #define __NOUVEAU_CONTEXT_H__ -#include "dri_util.h" -#include "xmlconfig.h" - #include "nouveau/nouveau_winsys.h" #include "nouveau_drmif.h" #include "nouveau_dma.h" -struct nouveau_framebuffer { - struct st_framebuffer *stfb; -}; - struct nouveau_channel_context { struct pipe_screen *pscreen; int refcount; @@ -41,16 +34,7 @@ struct nouveau_channel_context { }; struct nouveau_context { - struct st_context *st; - - /* DRI stuff */ - __DRIscreenPrivate *dri_screen; - __DRIdrawablePrivate *dri_drawable; - unsigned int last_stamp; - driOptionCache dri_option_cache; - drm_context_t drm_context; - drmLock drm_lock; - GLboolean locked; + int locked; struct nouveau_screen *nv_screen; struct pipe_surface *frontbuffer; @@ -76,26 +60,11 @@ struct nouveau_context { unsigned, unsigned, unsigned, unsigned, unsigned); }; -extern GLboolean nouveau_context_create(const __GLcontextModes *, - __DRIcontextPrivate *, void *); -extern void nouveau_context_destroy(__DRIcontextPrivate *); -extern GLboolean nouveau_context_bind(__DRIcontextPrivate *, - __DRIdrawablePrivate *draw, - __DRIdrawablePrivate *read); -extern GLboolean nouveau_context_unbind(__DRIcontextPrivate *); - -#ifdef DEBUG -extern int __nouveau_debug; - -#define DEBUG_BO (1 << 0) - -#define DBG(flag, ...) do { \ - if (__nouveau_debug & (DEBUG_##flag)) \ - NOUVEAU_ERR(__VA_ARGS__); \ -} while(0) -#else -#define DBG(flag, ...) -#endif +extern int nouveau_context_init(struct nouveau_screen *nv_screen, + drm_context_t hHWContext, drmLock *sarea_lock, + struct nouveau_context *nv_share, + struct nouveau_context *nv); +extern void nouveau_context_cleanup(struct nouveau_context *nv); extern void LOCK_HARDWARE(struct nouveau_context *); extern void UNLOCK_HARDWARE(struct nouveau_context *); @@ -110,4 +79,8 @@ extern int nouveau_surface_init_nv50(struct nouveau_context *); extern uint32_t *nouveau_pipe_dma_beginp(struct nouveau_grobj *, int, int); extern void nouveau_pipe_dma_kickoff(struct nouveau_channel *); +/* Must be provided by clients of common code */ +extern void +nouveau_contended_lock(struct nouveau_context *nv); + #endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_device.c b/src/gallium/winsys/drm/nouveau/common/nouveau_device.c index 0b452fcd02..92b57b834b 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_device.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_device.c @@ -23,7 +23,7 @@ #include <stdio.h> #include <stdlib.h> #include <errno.h> - +#include <util/u_memory.h> #include "nouveau_drmif.h" int @@ -36,7 +36,7 @@ nouveau_device_open_existing(struct nouveau_device **dev, int close, if (!dev || *dev) return -EINVAL; - nvdev = calloc(1, sizeof(*nvdev)); + nvdev = CALLOC_STRUCT(nouveau_device_priv); if (!nvdev) return -ENOMEM; nvdev->fd = fd; @@ -112,7 +112,7 @@ nouveau_device_close(struct nouveau_device **dev) drmDestroyContext(nvdev->fd, nvdev->ctx); drmClose(nvdev->fd); } - free(nvdev); + FREE(nvdev); } int diff --git a/src/gallium/winsys/drm/nouveau/nouveau_dma.c b/src/gallium/winsys/drm/nouveau/common/nouveau_dma.c index f8a8ba04f6..f8a8ba04f6 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_dma.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_dma.c diff --git a/src/gallium/winsys/drm/nouveau/nouveau_dma.h b/src/gallium/winsys/drm/nouveau/common/nouveau_dma.h index cfa6d26e82..cfa6d26e82 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_dma.h +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_dma.h diff --git a/src/gallium/winsys/drm/nouveau/nouveau_dri.h b/src/gallium/winsys/drm/nouveau/common/nouveau_dri.h index 1207c2d609..1207c2d609 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_dri.h +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_dri.h diff --git a/src/gallium/winsys/drm/nouveau/nouveau_drmif.h b/src/gallium/winsys/drm/nouveau/common/nouveau_drmif.h index dcd6a5eb0a..5f72800676 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_drmif.h +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_drmif.h @@ -287,6 +287,9 @@ extern void nouveau_bo_del(struct nouveau_bo **); extern int +nouveau_bo_busy(struct nouveau_bo *bo, uint32_t flags); + +extern int nouveau_bo_map(struct nouveau_bo *, uint32_t flags); extern void diff --git a/src/gallium/winsys/drm/nouveau/nouveau_fence.c b/src/gallium/winsys/drm/nouveau/common/nouveau_fence.c index e7b0b4ff07..e7b0b4ff07 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_fence.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_fence.c diff --git a/src/gallium/winsys/drm/nouveau/nouveau_grobj.c b/src/gallium/winsys/drm/nouveau/common/nouveau_grobj.c index 51523897d5..fb430a25b8 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_grobj.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_grobj.c @@ -22,7 +22,7 @@ #include <stdlib.h> #include <errno.h> - +#include <util/u_memory.h> #include "nouveau_drmif.h" int @@ -37,7 +37,7 @@ nouveau_grobj_alloc(struct nouveau_channel *chan, uint32_t handle, if (!nvdev || !grobj || *grobj) return -EINVAL; - nvgrobj = calloc(1, sizeof(*nvgrobj)); + nvgrobj = CALLOC_STRUCT(nouveau_grobj_priv); if (!nvgrobj) return -ENOMEM; nvgrobj->base.channel = chan; @@ -67,7 +67,7 @@ nouveau_grobj_ref(struct nouveau_channel *chan, uint32_t handle, if (!chan || !grobj || *grobj) return -EINVAL; - nvgrobj = calloc(1, sizeof(struct nouveau_grobj_priv)); + nvgrobj = CALLOC_STRUCT(nouveau_grobj_priv); if (!nvgrobj) return -ENOMEM; nvgrobj->base.channel = chan; @@ -102,6 +102,6 @@ nouveau_grobj_free(struct nouveau_grobj **grobj) drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GPUOBJ_FREE, &f, sizeof(f)); } - free(nvgrobj); + FREE(nvgrobj); } diff --git a/src/gallium/winsys/drm/nouveau/nouveau_local.h b/src/gallium/winsys/drm/nouveau/common/nouveau_local.h index e878a40803..877c7a8c47 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_local.h +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_local.h @@ -5,8 +5,6 @@ #include "nouveau_winsys_pipe.h" #include <stdio.h> -struct pipe_buffer; - /* Debug output */ #define NOUVEAU_MSG(fmt, args...) do { \ fprintf(stdout, "nouveau: "fmt, ##args); \ diff --git a/src/gallium/winsys/drm/nouveau/nouveau_lock.c b/src/gallium/winsys/drm/nouveau/common/nouveau_lock.c index 9adb9ac854..e8cf051ed9 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_lock.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_lock.c @@ -25,34 +25,11 @@ * **************************************************************************/ -#include "main/glheader.h" -#include "glapi/glthread.h" -#include <GL/internal/glcore.h> - +#include <pipe/p_thread.h> #include "nouveau_context.h" #include "nouveau_screen.h" -_glthread_DECLARE_STATIC_MUTEX( lockMutex ); - -static void -nouveau_contended_lock(struct nouveau_context *nv, GLuint flags) -{ - __DRIdrawablePrivate *dPriv = nv->dri_drawable; - __DRIscreenPrivate *sPriv = nv->dri_screen; - struct nouveau_screen *nv_screen = nv->nv_screen; - struct nouveau_device *dev = nv_screen->device; - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - drmGetLock(nvdev->fd, nvdev->ctx, flags); - - /* If the window moved, may need to set a new cliprect now. - * - * NOTE: This releases and regains the hw lock, so all state - * checking must be done *after* this call: - */ - if (dPriv) - DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); -} +pipe_static_mutex(lockMutex); /* Lock the hardware and validate our state. */ @@ -64,20 +41,21 @@ LOCK_HARDWARE(struct nouveau_context *nv) struct nouveau_device_priv *nvdev = nouveau_device(dev); char __ret=0; - _glthread_LOCK_MUTEX(lockMutex); assert(!nv->locked); - + pipe_mutex_lock(lockMutex); + DRM_CAS(nvdev->lock, nvdev->ctx, (DRM_LOCK_HELD | nvdev->ctx), __ret); - - if (__ret) - nouveau_contended_lock(nv, 0); - nv->locked = GL_TRUE; -} + if (__ret) { + drmGetLock(nvdev->fd, nvdev->ctx, 0); + nouveau_contended_lock(nv); + } + nv->locked = 1; +} - /* Unlock the hardware using the global current context - */ +/* Unlock the hardware using the global current context + */ void UNLOCK_HARDWARE(struct nouveau_context *nv) { @@ -86,9 +64,9 @@ UNLOCK_HARDWARE(struct nouveau_context *nv) struct nouveau_device_priv *nvdev = nouveau_device(dev); assert(nv->locked); - nv->locked = GL_FALSE; + nv->locked = 0; DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx); - _glthread_UNLOCK_MUTEX(lockMutex); + pipe_mutex_unlock(lockMutex); } diff --git a/src/gallium/winsys/drm/nouveau/nouveau_notifier.c b/src/gallium/winsys/drm/nouveau/common/nouveau_notifier.c index 01e8f38440..01e8f38440 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_notifier.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_notifier.c diff --git a/src/gallium/winsys/drm/nouveau/nouveau_pushbuf.c b/src/gallium/winsys/drm/nouveau/common/nouveau_pushbuf.c index 815046ba85..7c094eb795 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_pushbuf.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_pushbuf.c @@ -23,7 +23,7 @@ #include <stdlib.h> #include <errno.h> #include <assert.h> - +#include <util/u_memory.h> #include "nouveau_drmif.h" #include "nouveau_dma.h" @@ -97,9 +97,9 @@ nouveau_pushbuf_init(struct nouveau_channel *chan) nouveau_pushbuf_space(chan, 0); chan->pushbuf = &nvchan->pb.base; - nvchan->pb.buffers = calloc(NOUVEAU_PUSHBUF_MAX_BUFFERS, + nvchan->pb.buffers = CALLOC(NOUVEAU_PUSHBUF_MAX_BUFFERS, sizeof(struct nouveau_pushbuf_bo)); - nvchan->pb.relocs = calloc(NOUVEAU_PUSHBUF_MAX_RELOCS, + nvchan->pb.relocs = CALLOC(NOUVEAU_PUSHBUF_MAX_RELOCS, sizeof(struct nouveau_pushbuf_reloc)); return 0; } @@ -268,4 +268,3 @@ nouveau_pushbuf_emit_reloc(struct nouveau_channel *chan, void *ptr, *(uint32_t *)ptr = nouveau_pushbuf_calc_reloc(bo, r); return 0; } - diff --git a/src/gallium/winsys/drm/nouveau/nouveau_resource.c b/src/gallium/winsys/drm/nouveau/common/nouveau_resource.c index 3bbcb5c45e..766fd279fe 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_resource.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_resource.c @@ -22,7 +22,7 @@ #include <stdlib.h> #include <errno.h> - +#include <util/u_memory.h> #include "nouveau_drmif.h" #include "nouveau_local.h" @@ -32,7 +32,7 @@ nouveau_resource_init(struct nouveau_resource **heap, { struct nouveau_resource *r; - r = calloc(1, sizeof(struct nouveau_resource)); + r = CALLOC_STRUCT(nouveau_resource); if (!r) return 1; @@ -53,7 +53,7 @@ nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, while (heap) { if (!heap->in_use && heap->size >= size) { - r = calloc(1, sizeof(struct nouveau_resource)); + r = CALLOC_STRUCT(nouveau_resource); if (!r) return 1; @@ -73,7 +73,7 @@ nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, *res = r; return 0; } - + heap = heap->next; } @@ -110,7 +110,7 @@ nouveau_resource_free(struct nouveau_resource **res) if (r->next) r->next->prev = r->prev; r->prev->size += r->size; - free(r); + FREE(r); } - + } diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_screen.c b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.c new file mode 100644 index 0000000000..422fbf0207 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.c @@ -0,0 +1,31 @@ +#include <util/u_memory.h> +#include "nouveau_dri.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" + +int +nouveau_screen_init(struct nouveau_dri *nv_dri, int dev_fd, + struct nouveau_screen *nv_screen) +{ + int ret; + + ret = nouveau_device_open_existing(&nv_screen->device, 0, + dev_fd, 0); + if (ret) { + NOUVEAU_ERR("Failed opening nouveau device: %d\n", ret); + return 1; + } + + nv_screen->front_offset = nv_dri->front_offset; + nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8); + nv_screen->front_cpp = nv_dri->bpp / 8; + nv_screen->front_height = nv_dri->height; + + return 0; +} + +void +nouveau_screen_cleanup(struct nouveau_screen *nv_screen) +{ + nouveau_device_close(&nv_screen->device); +} diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_screen.h b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.h new file mode 100644 index 0000000000..3e68e219d8 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.h @@ -0,0 +1,27 @@ +#ifndef __NOUVEAU_SCREEN_H__ +#define __NOUVEAU_SCREEN_H__ + +#include <stdint.h> + +struct nouveau_device; +struct nouveau_dri; + +struct nouveau_screen { + struct nouveau_device *device; + + uint32_t front_offset; + uint32_t front_pitch; + uint32_t front_cpp; + uint32_t front_height; + + void *nvc; +}; + +int +nouveau_screen_init(struct nouveau_dri *nv_dri, int dev_fd, + struct nouveau_screen *nv_screen); + +void +nouveau_screen_cleanup(struct nouveau_screen *nv_screen); + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_winsys.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys.c index 364340e1d3..364340e1d3 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys.c diff --git a/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c index 5276806de6..6895137506 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c @@ -1,55 +1,29 @@ -#include "pipe/p_winsys.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" - -#include "util/u_memory.h" - +#include <pipe/p_winsys.h> +#include <pipe/p_defines.h> +#include <pipe/p_inlines.h> +#include <util/u_memory.h> #include "nouveau_context.h" #include "nouveau_local.h" #include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" #include "nouveau_winsys_pipe.h" -static void -nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, - void *context_private) -{ - struct nouveau_context *nv = context_private; - __DRIdrawablePrivate *dPriv = nv->dri_drawable; - - nouveau_copy_buffer(dPriv, surf, NULL); -} - static const char * nouveau_get_name(struct pipe_winsys *pws) { return "Nouveau/DRI"; } -static struct pipe_buffer * -nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, - unsigned usage, unsigned size) +static uint32_t +nouveau_flags_from_usage(struct nouveau_context *nv, unsigned usage) { - struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; - struct nouveau_context *nv = nvpws->nv; struct nouveau_device *dev = nv->nv_screen->device; - struct nouveau_pipe_buffer *nvbuf; - uint32_t flags; - - nvbuf = calloc(1, sizeof(*nvbuf)); - if (!nvbuf) - return NULL; - nvbuf->base.refcount = 1; - nvbuf->base.alignment = alignment; - nvbuf->base.usage = usage; - nvbuf->base.size = size; - - flags = NOUVEAU_BO_LOCAL; + uint32_t flags = NOUVEAU_BO_LOCAL; if (usage & PIPE_BUFFER_USAGE_PIXEL) { if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE) flags |= NOUVEAU_BO_GART; - flags |= NOUVEAU_BO_VRAM; + if (!(usage & PIPE_BUFFER_USAGE_CPU_READ_WRITE)) + flags |= NOUVEAU_BO_VRAM; switch (dev->chipset & 0xf0) { case 0x50: @@ -74,8 +48,31 @@ nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, flags |= NOUVEAU_BO_GART; } + return flags; +} + +static struct pipe_buffer * +nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, + unsigned usage, unsigned size) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_context *nv = nvpws->nv; + struct nouveau_device *dev = nv->nv_screen->device; + struct nouveau_pipe_buffer *nvbuf; + uint32_t flags; + + nvbuf = CALLOC_STRUCT(nouveau_pipe_buffer); + if (!nvbuf) + return NULL; + nvbuf->base.refcount = 1; + nvbuf->base.alignment = alignment; + nvbuf->base.usage = usage; + nvbuf->base.size = size; + + flags = nouveau_flags_from_usage(nv, flags); + if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { - free(nvbuf); + FREE(nvbuf); return NULL; } @@ -89,14 +86,14 @@ nouveau_pipe_bo_user_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) struct nouveau_device *dev = nvpws->nv->nv_screen->device; struct nouveau_pipe_buffer *nvbuf; - nvbuf = calloc(1, sizeof(*nvbuf)); + nvbuf = CALLOC_STRUCT(nouveau_pipe_buffer); if (!nvbuf) return NULL; nvbuf->base.refcount = 1; nvbuf->base.size = bytes; if (nouveau_bo_user(dev, ptr, bytes, &nvbuf->bo)) { - free(nvbuf); + FREE(nvbuf); return NULL; } @@ -109,7 +106,7 @@ nouveau_pipe_bo_del(struct pipe_winsys *ws, struct pipe_buffer *buf) struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); nouveau_bo_del(&nvbuf->bo); - free(nvbuf); + FREE(nvbuf); } static void * @@ -124,6 +121,26 @@ nouveau_pipe_bo_map(struct pipe_winsys *pws, struct pipe_buffer *buf, if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) map_flags |= NOUVEAU_BO_WR; + /* XXX: Technically incorrect. If the client maps a buffer for write-only + * and leaves part of the buffer untouched it probably expects those parts + * to remain intact. This is violated because we allocate a whole new buffer + * and don't copy the previous buffer's contents, so this optimization is + * only valid if the client intends to overwrite the whole buffer. + */ + if ((map_flags & NOUVEAU_BO_RDWR) == NOUVEAU_BO_WR && + !nouveau_bo_busy(nvbuf->bo, map_flags)) { + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_context *nv = nvpws->nv; + struct nouveau_device *dev = nv->nv_screen->device; + struct nouveau_bo *rename; + uint32_t flags = nouveau_flags_from_usage(nv, buf->usage); + + if (!nouveau_bo_new(dev, flags, buf->alignment, buf->size, &rename)) { + nouveau_bo_del(&nvbuf->bo); + nvbuf->bo = rename; + } + } + if (nouveau_bo_map(nvbuf->bo, map_flags)) return NULL; return nvbuf->bo->map; @@ -175,6 +192,12 @@ nouveau_pipe_fence_finish(struct pipe_winsys *ws, return nouveau_fence_wait(&ref); } +static void +nouveau_destroy(struct pipe_winsys *pws) +{ + FREE(pws); +} + struct pipe_winsys * nouveau_create_pipe_winsys(struct nouveau_context *nv) { @@ -200,7 +223,7 @@ nouveau_create_pipe_winsys(struct nouveau_context *nv) pws->fence_finish = nouveau_pipe_fence_finish; pws->get_name = nouveau_get_name; + pws->destroy = nouveau_destroy; return &nvpws->pws; } - diff --git a/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.h b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h index 6a03ac0d77..14c728690d 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.h +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h @@ -31,4 +31,9 @@ nouveau_create_softpipe(struct nouveau_context *nv); struct pipe_context * nouveau_pipe_create(struct nouveau_context *nv); +/* Must be provided by clients of common code */ +extern void +nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, + void *context_private); + #endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_winsys_softpipe.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c index 68aade829d..04def600f4 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_winsys_softpipe.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c @@ -29,12 +29,12 @@ * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> */ -#include "imports.h" - -#include "pipe/p_defines.h" -#include "pipe/p_format.h" -#include "softpipe/sp_winsys.h" - +#include <pipe/p_winsys.h> +#include <pipe/p_screen.h> +#include <pipe/p_defines.h> +#include <pipe/p_format.h> +#include <softpipe/sp_winsys.h> +#include <util/u_memory.h> #include "nouveau_context.h" #include "nouveau_winsys_pipe.h" @@ -48,7 +48,7 @@ struct nouveau_softpipe_winsys { */ static boolean nouveau_is_format_supported(struct softpipe_winsys *sws, - enum pipe_format format) + enum pipe_format format) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -68,19 +68,34 @@ nouveau_create_softpipe(struct nouveau_context *nv) struct nouveau_softpipe_winsys *nvsws; struct pipe_screen *pscreen; struct pipe_winsys *ws; + struct pipe_context *pipe; ws = nouveau_create_pipe_winsys(nv); if (!ws) return NULL; pscreen = softpipe_create_screen(ws); - + if (!pscreen) { + ws->destroy(ws); + return NULL; + } nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); - if (!nvsws) + if (!nvsws) { + ws->destroy(ws); + pscreen->destroy(pscreen); return NULL; + } nvsws->sws.is_format_supported = nouveau_is_format_supported; nvsws->nv = nv; - return softpipe_create(pscreen, ws, &nvsws->sws); + pipe = softpipe_create(pscreen, ws, &nvsws->sws); + if (!pipe) { + ws->destroy(ws); + pscreen->destroy(pscreen); + FREE(nvsws); + return NULL; + } + + return pipe; } diff --git a/src/gallium/winsys/drm/nouveau/nv04_surface.c b/src/gallium/winsys/drm/nouveau/common/nv04_surface.c index 68338eb814..e9a8a2ac1c 100644 --- a/src/gallium/winsys/drm/nouveau/nv04_surface.c +++ b/src/gallium/winsys/drm/nouveau/common/nv04_surface.c @@ -82,6 +82,37 @@ nv04_scaled_image_format(enum pipe_format format) } } +static INLINE unsigned +nv04_swizzle_bits(unsigned x, unsigned y) +{ + unsigned u = (x & 0x001) << 0 | + (x & 0x002) << 1 | + (x & 0x004) << 2 | + (x & 0x008) << 3 | + (x & 0x010) << 4 | + (x & 0x020) << 5 | + (x & 0x040) << 6 | + (x & 0x080) << 7 | + (x & 0x100) << 8 | + (x & 0x200) << 9 | + (x & 0x400) << 10 | + (x & 0x800) << 11; + + unsigned v = (y & 0x001) << 1 | + (y & 0x002) << 2 | + (y & 0x004) << 3 | + (y & 0x008) << 4 | + (y & 0x010) << 5 | + (y & 0x020) << 6 | + (y & 0x040) << 7 | + (y & 0x080) << 8 | + (y & 0x100) << 9 | + (y & 0x200) << 10 | + (y & 0x400) << 11 | + (y & 0x800) << 12; + return v | u; +} + static void nv04_surface_copy_swizzle(struct nouveau_context *nv, unsigned dx, unsigned dy, unsigned sx, unsigned sy, unsigned w, unsigned h) @@ -90,19 +121,23 @@ nv04_surface_copy_swizzle(struct nouveau_context *nv, unsigned dx, unsigned dy, struct pipe_surface *dst = nv->surf_dst; struct pipe_surface *src = nv->surf_src; + const unsigned max_w = 1024; + const unsigned max_h = 1024; + const unsigned sub_w = w > max_w ? max_w : w; + const unsigned sub_h = h > max_h ? max_h : h; + unsigned cx = 0; + unsigned cy = 0; + /* POT or GTFO */ assert(!(w & (w - 1)) && !(h & (h - 1))); BEGIN_RING(chan, nv->nvc->NvSwzSurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, nv->nvc->NvSwzSurf, NV04_SWIZZLED_SURFACE_FORMAT, 2); + BEGIN_RING(chan, nv->nvc->NvSwzSurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); OUT_RING (chan, nv04_surface_format(dst->format) | log2i(w) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | log2i(h) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); - OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, nv->nvc->NvSIFM, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); OUT_RELOCo(chan, nouveau_buffer(src->buffer)->bo, @@ -110,24 +145,35 @@ nv04_surface_copy_swizzle(struct nouveau_context *nv, unsigned dx, unsigned dy, BEGIN_RING(chan, nv->nvc->NvSIFM, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); OUT_RING (chan, nv->nvc->NvSwzSurf->handle); - BEGIN_RING(chan, nv->nvc->NvSIFM, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); - OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); - OUT_RING (chan, nv04_scaled_image_format(src->format)); - OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); - OUT_RING (chan, 0); - OUT_RING (chan, h << 16 | w); - OUT_RING (chan, 0); - OUT_RING (chan, h << 16 | w); - OUT_RING (chan, 1 << 20); - OUT_RING (chan, 1 << 20); - BEGIN_RING(chan, nv->nvc->NvSIFM, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); - OUT_RING (chan, h << 16 | w); - OUT_RING (chan, src->stride | - NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | - NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); - OUT_RELOCl(chan, nouveau_buffer(src->buffer)->bo, src->offset, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RING (chan, 0); + for (cy = 0; cy < h; cy += sub_h) { + for (cx = 0; cx < w; cx += sub_w) { + BEGIN_RING(chan, nv->nvc->NvSwzSurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); + OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, + dst->offset + nv04_swizzle_bits(cx, cy) * dst->block.size, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, nv->nvc->NvSIFM, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); + OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); + OUT_RING (chan, nv04_scaled_image_format(src->format)); + OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); + OUT_RING (chan, 0); + OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, 0); + OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, 1 << 20); + OUT_RING (chan, 1 << 20); + + BEGIN_RING(chan, nv->nvc->NvSIFM, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); + OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, src->stride | + NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | + NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); + OUT_RELOCl(chan, nouveau_buffer(src->buffer)->bo, + src->offset + cy * src->stride + cx * src->block.size, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, 0); + } + } } static void diff --git a/src/gallium/winsys/drm/nouveau/nv50_surface.c b/src/gallium/winsys/drm/nouveau/common/nv50_surface.c index c8ab7f690f..c8ab7f690f 100644 --- a/src/gallium/winsys/drm/nouveau/nv50_surface.c +++ b/src/gallium/winsys/drm/nouveau/common/nv50_surface.c diff --git a/src/gallium/winsys/drm/nouveau/dri/Makefile b/src/gallium/winsys/drm/nouveau/dri/Makefile new file mode 100644 index 0000000000..e129e42e97 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/Makefile @@ -0,0 +1,31 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = nouveau_dri.so + +MINIGLX_SOURCES = + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/nv04/libnv04.a \ + $(TOP)/src/gallium/drivers/nv10/libnv10.a \ + $(TOP)/src/gallium/drivers/nv20/libnv20.a \ + $(TOP)/src/gallium/drivers/nv30/libnv30.a \ + $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a + +DRIVER_SOURCES = \ + nouveau_context_dri.c \ + nouveau_screen_dri.c \ + nouveau_swapbuffers.c \ + ../common/libnouveaudrm.a + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c new file mode 100644 index 0000000000..006978b182 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c @@ -0,0 +1,124 @@ +#include <main/glheader.h> +#include <glapi/glthread.h> +#include <GL/internal/glcore.h> +#include <utils.h> + +#include <state_tracker/st_public.h> +#include <state_tracker/st_context.h> +#include <pipe/p_defines.h> +#include <pipe/p_context.h> +#include <pipe/p_screen.h> + +#include "../common/nouveau_winsys_pipe.h" +#include "../common/nouveau_dri.h" +#include "../common/nouveau_local.h" +#include "nouveau_context_dri.h" +#include "nouveau_screen_dri.h" + +#ifdef DEBUG +static const struct dri_debug_control debug_control[] = { + { "bo", DEBUG_BO }, + { NULL, 0 } +}; +int __nouveau_debug = 0; +#endif + +GLboolean +nouveau_context_create(const __GLcontextModes *glVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *driScrnPriv = driContextPriv->driScreenPriv; + struct nouveau_screen_dri *nv_screen = driScrnPriv->private; + struct nouveau_context_dri *nv = CALLOC_STRUCT(nouveau_context_dri); + struct st_context *st_share = NULL; + struct nouveau_context_dri *nv_share = NULL; + struct pipe_context *pipe; + + if (sharedContextPrivate) { + st_share = ((struct nouveau_context_dri *)sharedContextPrivate)->st; + nv_share = st_share->pipe->priv; + } + + if (nouveau_context_init(&nv_screen->base, driContextPriv->hHWContext, + (drmLock *)&driScrnPriv->pSAREA->lock, + nv_share, &nv->base)) { + return GL_FALSE; + } + + pipe = nv->base.nvc->pctx[nv->base.pctx_id]; + driContextPriv->driverPrivate = (void *)nv; + //nv->nv_screen = nv_screen; + nv->dri_screen = driScrnPriv; + + driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, + nv->dri_screen->myNum, "nouveau"); +#ifdef DEBUG + __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), + debug_control); +#endif + + nv->st = st_create_context(pipe, glVis, st_share); + return GL_TRUE; +} + +void +nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) +{ + struct nouveau_context_dri *nv = driContextPriv->driverPrivate; + + assert(nv); + + st_finish(nv->st); + st_destroy_context(nv->st); + + nouveau_context_cleanup(&nv->base); + + FREE(nv); +} + +GLboolean +nouveau_context_bind(__DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv) +{ + struct nouveau_context_dri *nv; + struct nouveau_framebuffer *draw, *read; + + if (!driContextPriv) { + st_make_current(NULL, NULL, NULL); + return GL_TRUE; + } + + nv = driContextPriv->driverPrivate; + draw = driDrawPriv->driverPrivate; + read = driReadPriv->driverPrivate; + + st_make_current(nv->st, draw->stfb, read->stfb); + + if ((nv->dri_drawable != driDrawPriv) || + (nv->last_stamp != driDrawPriv->lastStamp)) { + nv->dri_drawable = driDrawPriv; + st_resize_framebuffer(draw->stfb, driDrawPriv->w, + driDrawPriv->h); + nv->last_stamp = driDrawPriv->lastStamp; + } + + if (driDrawPriv != driReadPriv) { + st_resize_framebuffer(read->stfb, driReadPriv->w, + driReadPriv->h); + } + + return GL_TRUE; +} + +GLboolean +nouveau_context_unbind(__DRIcontextPrivate *driContextPriv) +{ + struct nouveau_context_dri *nv = driContextPriv->driverPrivate; + (void)nv; + + st_flush(nv->st, 0, NULL); + return GL_TRUE; +} + diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h new file mode 100644 index 0000000000..8257790d47 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h @@ -0,0 +1,49 @@ +#ifndef __NOUVEAU_CONTEXT_DRI_H__ +#define __NOUVEAU_CONTEXT_DRI_H__ + +#include <dri_util.h> +#include <xmlconfig.h> +#include <nouveau/nouveau_winsys.h> +#include "../common/nouveau_context.h" +#include "../common/nouveau_drmif.h" +#include "../common/nouveau_dma.h" + +struct nouveau_framebuffer { + struct st_framebuffer *stfb; +}; + +struct nouveau_context_dri { + struct nouveau_context base; + struct st_context *st; + + /* DRI stuff */ + __DRIscreenPrivate *dri_screen; + __DRIdrawablePrivate *dri_drawable; + unsigned int last_stamp; + driOptionCache dri_option_cache; + drm_context_t drm_context; + drmLock drm_lock; +}; + +extern GLboolean nouveau_context_create(const __GLcontextModes *, + __DRIcontextPrivate *, void *); +extern void nouveau_context_destroy(__DRIcontextPrivate *); +extern GLboolean nouveau_context_bind(__DRIcontextPrivate *, + __DRIdrawablePrivate *draw, + __DRIdrawablePrivate *read); +extern GLboolean nouveau_context_unbind(__DRIcontextPrivate *); + +#ifdef DEBUG +extern int __nouveau_debug; + +#define DEBUG_BO (1 << 0) + +#define DBG(flag, ...) do { \ + if (__nouveau_debug & (DEBUG_##flag)) \ + NOUVEAU_ERR(__VA_ARGS__); \ +} while(0) +#else +#define DBG(flag, ...) +#endif + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_screen.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.c index c6d0c53588..1d7c92376f 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_screen.c +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.c @@ -1,16 +1,15 @@ -#include "utils.h" -#include "vblank.h" -#include "xmlpool.h" - -#include "pipe/p_context.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_cb_fbo.h" - -#include "nouveau_context.h" -#include "nouveau_drm.h" -#include "nouveau_dri.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" +#include <utils.h> +#include <vblank.h> +#include <xmlpool.h> + +#include <pipe/p_context.h> +#include <state_tracker/st_public.h> +#include <state_tracker/st_cb_fbo.h> +#include <nouveau_drm.h> +#include "../common/nouveau_dri.h" +#include "../common/nouveau_local.h" +#include "nouveau_context_dri.h" +#include "nouveau_screen_dri.h" #include "nouveau_swapbuffers.h" #if NOUVEAU_DRM_HEADER_PATCHLEVEL != 11 @@ -183,13 +182,12 @@ static const __DRIconfig ** nouveau_screen_create(__DRIscreenPrivate *psp) { struct nouveau_dri *nv_dri = psp->pDevPriv; - struct nouveau_screen *nv_screen; + struct nouveau_screen_dri *nv_screen; static const __DRIversion ddx_expected = { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; - int ret; if (!driCheckDriDdxDrmVersions2("nouveau", &psp->dri_version, &dri_expected, @@ -209,28 +207,23 @@ nouveau_screen_create(__DRIscreenPrivate *psp) if (psp->devPrivSize != sizeof(struct nouveau_dri)) { NOUVEAU_ERR("DRI struct mismatch between DDX/DRI\n"); - return GL_FALSE; + return NULL; } - nv_screen = CALLOC_STRUCT(nouveau_screen); + nv_screen = CALLOC_STRUCT(nouveau_screen_dri); if (!nv_screen) - return GL_FALSE; - nv_screen->driScrnPriv = psp; - psp->private = (void *)nv_screen; + return NULL; driParseOptionInfo(&nv_screen->option_cache, __driConfigOptions, __driNConfigOptions); - if ((ret = nouveau_device_open_existing(&nv_screen->device, 0, - psp->fd, 0))) { - NOUVEAU_ERR("Failed opening nouveau device: %d\n", ret); - return GL_FALSE; + if (nouveau_screen_init(nv_dri, psp->fd, &nv_screen->base)) { + FREE(nv_screen); + return NULL; } - nv_screen->front_offset = nv_dri->front_offset; - nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8); - nv_screen->front_cpp = nv_dri->bpp / 8; - nv_screen->front_height = nv_dri->height; + nv_screen->driScrnPriv = psp; + psp->private = (void *)nv_screen; return (const __DRIconfig **) nouveau_fill_in_modes(psp, nv_dri->bpp, @@ -241,9 +234,10 @@ nouveau_screen_create(__DRIscreenPrivate *psp) static void nouveau_screen_destroy(__DRIscreenPrivate *driScrnPriv) { - struct nouveau_screen *nv_screen = driScrnPriv->private; + struct nouveau_screen_dri *nv_screen = driScrnPriv->private; driScrnPriv->private = NULL; + nouveau_screen_cleanup(&nv_screen->base); FREE(nv_screen); } diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h new file mode 100644 index 0000000000..1498087819 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h @@ -0,0 +1,13 @@ +#ifndef __NOUVEAU_SCREEN_DRI_H__ +#define __NOUVEAU_SCREEN_DRI_H__ + +#include "../common/nouveau_screen.h" +#include "xmlconfig.h" + +struct nouveau_screen_dri { + struct nouveau_screen base; + __DRIscreenPrivate *driScrnPriv; + driOptionCache option_cache; +}; + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c index 70e0104e83..38461b2b0c 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.c +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c @@ -1,34 +1,34 @@ -#include "main/glheader.h" -#include "glapi/glthread.h" +#include <main/glheader.h> +#include <glapi/glthread.h> #include <GL/internal/glcore.h> -#include "pipe/p_context.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_context.h" -#include "state_tracker/st_cb_fbo.h" +#include <pipe/p_context.h> +#include <state_tracker/st_public.h> +#include <state_tracker/st_context.h> +#include <state_tracker/st_cb_fbo.h> -#include "nouveau_context.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" +#include "../common/nouveau_local.h" +#include "nouveau_context_dri.h" +#include "nouveau_screen_dri.h" #include "nouveau_swapbuffers.h" void nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf, const drm_clip_rect_t *rect) { - struct nouveau_context *nv = dPriv->driContextPriv->driverPrivate; + struct nouveau_context_dri *nv = dPriv->driContextPriv->driverPrivate; drm_clip_rect_t *pbox; int nbox, i; - LOCK_HARDWARE(nv); + LOCK_HARDWARE(&nv->base); if (!dPriv->numClipRects) { - UNLOCK_HARDWARE(nv); + UNLOCK_HARDWARE(&nv->base); return; } pbox = dPriv->pClipRects; nbox = dPriv->numClipRects; - nv->surface_copy_prep(nv, nv->frontbuffer, surf); + nv->base.surface_copy_prep(&nv->base, nv->base.frontbuffer, surf); for (i = 0; i < nbox; i++, pbox++) { int sx, sy, dx, dy, w, h; @@ -39,11 +39,11 @@ nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf, w = pbox->x2 - pbox->x1; h = pbox->y2 - pbox->y1; - nv->surface_copy(nv, dx, dy, sx, sy, w, h); + nv->base.surface_copy(&nv->base, dx, dy, sx, sy, w, h); } - FIRE_RING(nv->nvc->channel); - UNLOCK_HARDWARE(nv); + FIRE_RING(nv->base.nvc->channel); + UNLOCK_HARDWARE(&nv->base); if (nv->last_stamp != dPriv->lastStamp) { struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; @@ -84,3 +84,29 @@ nouveau_swap_buffers(__DRIdrawablePrivate *dPriv) } } +void +nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, + void *context_private) +{ + struct nouveau_context_dri *nv = context_private; + __DRIdrawablePrivate *dPriv = nv->dri_drawable; + + nouveau_copy_buffer(dPriv, surf, NULL); +} + +void +nouveau_contended_lock(struct nouveau_context *nv) +{ + struct nouveau_context_dri *nv_sub = (struct nouveau_context_dri*)nv; + __DRIdrawablePrivate *dPriv = nv_sub->dri_drawable; + __DRIscreenPrivate *sPriv = nv_sub->dri_screen; + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dPriv) + DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h index 825d3da6da..825d3da6da 100644 --- a/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.h +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h diff --git a/src/gallium/winsys/drm/nouveau/nouveau_screen.h b/src/gallium/winsys/drm/nouveau/nouveau_screen.h deleted file mode 100644 index 388d6be9bb..0000000000 --- a/src/gallium/winsys/drm/nouveau/nouveau_screen.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef __NOUVEAU_SCREEN_H__ -#define __NOUVEAU_SCREEN_H__ - -#include "xmlconfig.h" - -struct nouveau_screen { - __DRIscreenPrivate *driScrnPriv; - driOptionCache option_cache; - - struct nouveau_device *device; - - uint32_t front_offset; - uint32_t front_pitch; - uint32_t front_cpp; - uint32_t front_height; - - void *nvc; -}; - -#endif diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile index ff43327778..22d925b643 100644 --- a/src/gallium/winsys/g3dvl/nouveau/Makefile +++ b/src/gallium/winsys/g3dvl/nouveau/Makefile @@ -3,35 +3,33 @@ GALLIUMDIR = ../../.. DRMDIR ?= /usr DRIDIR = ../../../../driclient -OBJECTS = nouveau_bo.o nouveau_fence.o nouveau_swapbuffers.o nouveau_channel.o \ - nouveau_grobj.o nouveau_context.o nouveau_winsys.o nouveau_lock.o \ - nouveau_winsys_pipe.o nouveau_device.o nouveau_notifier.o nouveau_dma.o \ - nouveau_pushbuf.o nouveau_resource.o nouveau_screen.o nv04_surface.o \ - nv50_surface.o #nouveau_winsys_softpipe.o - -CFLAGS += -g -Wall -fPIC \ - -I${GALLIUMDIR}/include \ - -I${GALLIUMDIR}/winsys/g3dvl \ - -I${DRMDIR}/include \ - -I${DRMDIR}/include/drm \ - -I${GALLIUMDIR}/drivers \ - -I${GALLIUMDIR}/auxiliary \ +OBJECTS = nouveau_screen_vl.o nouveau_context_vl.o nouveau_swapbuffers.o + +CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/winsys/g3dvl \ + -I${GALLIUMDIR}/winsys/drm/nouveau \ + -I${DRMDIR}/include \ + -I${DRMDIR}/include/drm \ + -I${GALLIUMDIR}/drivers \ + -I${GALLIUMDIR}/auxiliary \ -I${DRIDIR}/include -LDFLAGS += -L${DRMDIR}/lib \ - -L${DRIDIR}/lib \ - -L${GALLIUMDIR}/auxiliary/draw \ - -L${GALLIUMDIR}/auxiliary/tgsi \ - -L${GALLIUMDIR}/auxiliary/translate \ - -L${GALLIUMDIR}/auxiliary/rtasm \ - -L${GALLIUMDIR}/auxiliary/cso_cache \ - -L${GALLIUMDIR}/drivers/nv10 \ - -L${GALLIUMDIR}/drivers/nv20 \ - -L${GALLIUMDIR}/drivers/nv30 \ - -L${GALLIUMDIR}/drivers/nv40 \ +LDFLAGS += -L${DRMDIR}/lib \ + -L${DRIDIR}/lib \ + -L${GALLIUMDIR}/winsys/drm/nouveau/common \ + -L${GALLIUMDIR}/auxiliary/draw \ + -L${GALLIUMDIR}/auxiliary/tgsi \ + -L${GALLIUMDIR}/auxiliary/translate \ + -L${GALLIUMDIR}/auxiliary/rtasm \ + -L${GALLIUMDIR}/auxiliary/cso_cache \ + -L${GALLIUMDIR}/drivers/nv10 \ + -L${GALLIUMDIR}/drivers/nv20 \ + -L${GALLIUMDIR}/drivers/nv30 \ + -L${GALLIUMDIR}/drivers/nv40 \ -L${GALLIUMDIR}/drivers/nv50 -LIBS += -ldriclient -ldrm -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm +LIBS += -lnouveaudrm -ldriclient -ldrm -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm ############################################# diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_context.c deleted file mode 100644 index 06a61fcda3..0000000000 --- a/src/gallium/winsys/g3dvl/nouveau/nouveau_context.c +++ /dev/null @@ -1,370 +0,0 @@ -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "pipe/p_screen.h" -#include "util/u_memory.h" - -#include "nouveau_context.h" -#include "nouveau_dri.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_winsys_pipe.h" - -/* -#ifdef DEBUG -static const struct dri_debug_control debug_control[] = { - { "bo", DEBUG_BO }, - { NULL, 0 } -}; -int __nouveau_debug = 0; -#endif -*/ - -/* - * TODO: Re-examine dri_screen, dri_context, nouveau_screen, nouveau_context - * relationships, seems like there is a lot of room for simplification there. - */ - -static void -nouveau_channel_context_destroy(struct nouveau_channel_context *nvc) -{ - nouveau_grobj_free(&nvc->NvCtxSurf2D); - nouveau_grobj_free(&nvc->NvImageBlit); - nouveau_grobj_free(&nvc->NvGdiRect); - nouveau_grobj_free(&nvc->NvM2MF); - nouveau_grobj_free(&nvc->Nv2D); - nouveau_grobj_free(&nvc->NvSwzSurf); - nouveau_grobj_free(&nvc->NvSIFM); - - nouveau_notifier_free(&nvc->sync_notifier); - - nouveau_channel_free(&nvc->channel); - - FREE(nvc); -} - -static struct nouveau_channel_context * -nouveau_channel_context_create(struct nouveau_device *dev) -{ - struct nouveau_channel_context *nvc; - int ret; - - nvc = CALLOC_STRUCT(nouveau_channel_context); - if (!nvc) - return NULL; - - if ((ret = nouveau_channel_alloc(dev, 0x8003d001, 0x8003d002, - &nvc->channel))) { - NOUVEAU_ERR("Error creating GPU channel: %d\n", ret); - nouveau_channel_context_destroy(nvc); - return NULL; - } - - nvc->next_handle = 0x80000000; - - if ((ret = nouveau_notifier_alloc(nvc->channel, nvc->next_handle++, 1, - &nvc->sync_notifier))) { - NOUVEAU_ERR("Error creating channel sync notifier: %d\n", ret); - nouveau_channel_context_destroy(nvc); - return NULL; - } - - switch (dev->chipset & 0xf0) { - case 0x50: - case 0x80: - case 0x90: - ret = nouveau_surface_channel_create_nv50(nvc); - break; - default: - ret = nouveau_surface_channel_create_nv04(nvc); - break; - } - - if (ret) { - NOUVEAU_ERR("Error initialising surface objects: %d\n", ret); - nouveau_channel_context_destroy(nvc); - return NULL; - } - - return nvc; -} - -int -nouveau_context_create(dri_context_t *dri_context) -{ - dri_screen_t *dri_screen = dri_context->dri_screen; - struct nouveau_screen *nv_screen = dri_screen->private; - struct nouveau_context *nv = CALLOC_STRUCT(nouveau_context); - struct pipe_context *pipe = NULL; - struct nouveau_channel_context *nvc = NULL; - struct nouveau_device *dev = nv_screen->device; - int i; - - switch (dev->chipset & 0xf0) { - case 0x10: - case 0x20: - /* NV10 */ - case 0x30: - /* NV30 */ - case 0x40: - case 0x60: - /* NV40 */ - case 0x50: - case 0x80: - case 0x90: - /* G80 */ - break; - default: - NOUVEAU_ERR("Unsupported chipset: NV%02x\n", dev->chipset); - return 1; - } - - dri_context->private = (void*)nv; - nv->dri_context = dri_context; - nv->nv_screen = nv_screen; - - { - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - nvdev->ctx = dri_context->drm_context; - nvdev->lock = (drmLock*)&dri_screen->sarea->lock; - } - - /* - driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, - nv->dri_screen->myNum, "nouveau"); -#ifdef DEBUG - __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), - debug_control); -#endif - */ - - /*XXX: Hack up a fake region and buffer object for front buffer. - * This will go away with TTM, replaced with a simple reference - * of the front buffer handle passed to us by the DDX. - */ - { - struct pipe_surface *fb_surf; - struct nouveau_pipe_buffer *fb_buf; - struct nouveau_bo_priv *fb_bo; - - fb_bo = calloc(1, sizeof(struct nouveau_bo_priv)); - fb_bo->drm.offset = nv_screen->front_offset; - fb_bo->drm.flags = NOUVEAU_MEM_FB; - fb_bo->drm.size = nv_screen->front_pitch * - nv_screen->front_height; - fb_bo->refcount = 1; - fb_bo->base.flags = NOUVEAU_BO_PIN | NOUVEAU_BO_VRAM; - fb_bo->base.offset = fb_bo->drm.offset; - fb_bo->base.handle = (unsigned long)fb_bo; - fb_bo->base.size = fb_bo->drm.size; - fb_bo->base.device = nv_screen->device; - - fb_buf = calloc(1, sizeof(struct nouveau_pipe_buffer)); - fb_buf->bo = &fb_bo->base; - - fb_surf = calloc(1, sizeof(struct pipe_surface)); - if (nv_screen->front_cpp == 2) - fb_surf->format = PIPE_FORMAT_R5G6B5_UNORM; - else - fb_surf->format = PIPE_FORMAT_A8R8G8B8_UNORM; - pf_get_block(fb_surf->format, &fb_surf->block); - fb_surf->width = nv_screen->front_pitch / nv_screen->front_cpp; - fb_surf->height = nv_screen->front_height; - fb_surf->stride = fb_surf->width * fb_surf->block.size; - fb_surf->refcount = 1; - fb_surf->buffer = &fb_buf->base; - - nv->frontbuffer = fb_surf; - } - - nvc = nv_screen->nvc; - - if (!nvc) { - nvc = nouveau_channel_context_create(dev); - if (!nvc) { - NOUVEAU_ERR("Failed initialising GPU context\n"); - return 1; - } - nv_screen->nvc = nvc; - } - - nvc->refcount++; - nv->nvc = nvc; - - /* Find a free slot for a pipe context, allocate a new one if needed */ - nv->pctx_id = -1; - for (i = 0; i < nvc->nr_pctx; i++) { - if (nvc->pctx[i] == NULL) { - nv->pctx_id = i; - break; - } - } - - if (nv->pctx_id < 0) { - nv->pctx_id = nvc->nr_pctx++; - nvc->pctx = - realloc(nvc->pctx, - sizeof(struct pipe_context *) * nvc->nr_pctx); - } - - /* Create pipe */ - switch (dev->chipset & 0xf0) { - case 0x50: - case 0x80: - case 0x90: - if (nouveau_surface_init_nv50(nv)) - return 1; - break; - default: - if (nouveau_surface_init_nv04(nv)) - return 1; - break; - } - - if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) { - struct pipe_screen *pscreen; - - pipe = nouveau_pipe_create(nv); - if (!pipe) - NOUVEAU_ERR("Couldn't create hw pipe\n"); - pscreen = nvc->pscreen; - - nv->cap.hw_vertex_buffer = - pscreen->get_param(pscreen, NOUVEAU_CAP_HW_VTXBUF); - nv->cap.hw_index_buffer = - pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF); - } - - /* XXX: nouveau_winsys_softpipe needs a mesa header removed before we can compile it. */ - /* - if (!pipe) { - NOUVEAU_MSG("Using softpipe\n"); - pipe = nouveau_create_softpipe(nv); - if (!pipe) { - NOUVEAU_ERR("Error creating pipe, bailing\n"); - return 1; - } - } - */ - if (!pipe) { - NOUVEAU_ERR("Error creating pipe, bailing\n"); - return 1; - } - - pipe->priv = nv; - - return 0; -} - -void -nouveau_context_destroy(dri_context_t *dri_context) -{ - struct nouveau_context *nv = dri_context->private; - struct nouveau_channel_context *nvc = nv->nvc; - - assert(nv); - - if (nv->pctx_id >= 0) { - nvc->pctx[nv->pctx_id] = NULL; - if (--nvc->refcount <= 0) { - nouveau_channel_context_destroy(nvc); - nv->nv_screen->nvc = NULL; - } - } - - free(nv); -} - -int -nouveau_context_bind(struct nouveau_context *nv, dri_drawable_t *dri_drawable) -{ - assert(nv); - assert(dri_drawable); - - if (nv->dri_drawable != dri_drawable) - { - nv->dri_drawable = dri_drawable; - dri_drawable->private = nv; - } - - return 0; -} - -int -nouveau_context_unbind(struct nouveau_context *nv) -{ - assert(nv); - - nv->dri_drawable = NULL; - - return 0; -} - -/* Show starts here */ - -int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable) -{ - struct nouveau_context *nv; - dri_drawable_t *dri_drawable; - - nv = pipe->priv; - - driCreateDrawable(nv->nv_screen->dri_screen, drawable, &dri_drawable); - - nouveau_context_bind(nv, dri_drawable); - - return 0; -} - -int unbind_pipe_drawable(struct pipe_context *pipe) -{ - nouveau_context_unbind(pipe->priv); - - return 0; -} - -struct pipe_context* create_pipe_context(Display *display, int screen) -{ - dri_screen_t *dri_screen; - dri_framebuffer_t dri_framebuf; - dri_context_t *dri_context; - struct nouveau_context *nv; - - driCreateScreen(display, screen, &dri_screen, &dri_framebuf); - driCreateContext(dri_screen, XDefaultVisual(display, screen), &dri_context); - - nouveau_screen_create(dri_screen, &dri_framebuf); - nouveau_context_create(dri_context); - - nv = dri_context->private; - - return nv->nvc->pctx[nv->pctx_id]; -} - -int destroy_pipe_context(struct pipe_context *pipe) -{ - struct pipe_screen *screen; - struct pipe_winsys *winsys; - struct nouveau_context *nv; - dri_screen_t *dri_screen; - dri_context_t *dri_context; - - assert(pipe); - - screen = pipe->screen; - winsys = pipe->winsys; - nv = pipe->priv; - dri_context = nv->dri_context; - dri_screen = dri_context->dri_screen; - - pipe->destroy(pipe); - screen->destroy(screen); - free(winsys); - - nouveau_context_destroy(dri_context); - nouveau_screen_destroy(dri_screen); - driDestroyContext(dri_context); - driDestroyScreen(dri_screen); - - return 0; -} diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_context.h deleted file mode 100644 index 395a3ab790..0000000000 --- a/src/gallium/winsys/g3dvl/nouveau/nouveau_context.h +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef __NOUVEAU_CONTEXT_H__ -#define __NOUVEAU_CONTEXT_H__ - -/*#include "xmlconfig.h"*/ - -#include <driclient.h> -#include "nouveau/nouveau_winsys.h" -#include "nouveau_drmif.h" -#include "nouveau_dma.h" - -struct nouveau_channel_context { - struct pipe_screen *pscreen; - int refcount; - - unsigned cur_pctx; - unsigned nr_pctx; - struct pipe_context **pctx; - - struct nouveau_channel *channel; - - struct nouveau_notifier *sync_notifier; - - /* Common */ - struct nouveau_grobj *NvM2MF; - /* NV04-NV40 */ - struct nouveau_grobj *NvCtxSurf2D; - struct nouveau_grobj *NvSwzSurf; - struct nouveau_grobj *NvImageBlit; - struct nouveau_grobj *NvGdiRect; - struct nouveau_grobj *NvSIFM; - /* G80 */ - struct nouveau_grobj *Nv2D; - - uint32_t next_handle; - uint32_t next_subchannel; - uint32_t next_sequence; -}; - -struct nouveau_context { - /* DRI stuff */ - dri_context_t *dri_context; - dri_drawable_t *dri_drawable; - unsigned int last_stamp; - /*driOptionCache dri_option_cache;*/ - drm_context_t drm_context; - drmLock drm_lock; - int locked; - struct nouveau_screen *nv_screen; - struct pipe_surface *frontbuffer; - - struct { - int hw_vertex_buffer; - int hw_index_buffer; - } cap; - - /* Hardware context */ - struct nouveau_channel_context *nvc; - int pctx_id; - - /* pipe_surface accel */ - struct pipe_surface *surf_src, *surf_dst; - unsigned surf_src_offset, surf_dst_offset; - - int (*surface_copy_prep)(struct nouveau_context *, - struct pipe_surface *dst, - struct pipe_surface *src); - void (*surface_copy)(struct nouveau_context *, unsigned dx, unsigned dy, - unsigned sx, unsigned sy, unsigned w, unsigned h); - void (*surface_copy_done)(struct nouveau_context *); - int (*surface_fill)(struct nouveau_context *, struct pipe_surface *, - unsigned, unsigned, unsigned, unsigned, unsigned); -}; - -extern int nouveau_context_create(dri_context_t *); -extern void nouveau_context_destroy(dri_context_t *); -extern int nouveau_context_bind(struct nouveau_context *, dri_drawable_t *); -extern int nouveau_context_unbind(struct nouveau_context *); - -#ifdef DEBUG -extern int __nouveau_debug; - -#define DEBUG_BO (1 << 0) - -#define DBG(flag, ...) do { \ - if (__nouveau_debug & (DEBUG_##flag)) \ - NOUVEAU_ERR(__VA_ARGS__); \ -} while(0) -#else -#define DBG(flag, ...) -#endif - -extern void LOCK_HARDWARE(struct nouveau_context *); -extern void UNLOCK_HARDWARE(struct nouveau_context *); - -extern int -nouveau_surface_channel_create_nv04(struct nouveau_channel_context *); -extern int -nouveau_surface_channel_create_nv50(struct nouveau_channel_context *); -extern int nouveau_surface_init_nv04(struct nouveau_context *); -extern int nouveau_surface_init_nv50(struct nouveau_context *); - -extern uint32_t *nouveau_pipe_dma_beginp(struct nouveau_grobj *, int, int); -extern void nouveau_pipe_dma_kickoff(struct nouveau_channel *); - -#endif diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c new file mode 100644 index 0000000000..dfc4905bc0 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c @@ -0,0 +1,172 @@ +#include "nouveau_context_vl.h" +#include <pipe/p_defines.h> +#include <pipe/p_context.h> +#include <pipe/p_screen.h> +#include <util/u_memory.h> +#include <common/nouveau_dri.h> +#include <common/nouveau_local.h> +#include <common/nouveau_winsys_pipe.h> +#include "nouveau_screen_vl.h" + +/* +#ifdef DEBUG +static const struct dri_debug_control debug_control[] = { + { "bo", DEBUG_BO }, + { NULL, 0 } +}; +int __nouveau_debug = 0; +#endif +*/ + +int +nouveau_context_create(dri_context_t *dri_context) +{ + dri_screen_t *dri_screen; + struct nouveau_screen_vl *nv_screen; + struct nouveau_context_vl *nv; + + assert (dri_context); + + dri_screen = dri_context->dri_screen; + nv_screen = dri_screen->private; + nv = CALLOC_STRUCT(nouveau_context_vl); + + if (!nv) + return 1; + + if (nouveau_context_init(&nv_screen->base, dri_context->drm_context, + (drmLock*)&dri_screen->sarea->lock, NULL, &nv->base)) + { + FREE(nv); + return 1; + } + + dri_context->private = (void*)nv; + nv->dri_context = dri_context; + nv->nv_screen = nv_screen; + + /* + driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, + nv->dri_screen->myNum, "nouveau"); +#ifdef DEBUG + __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), + debug_control); +#endif + */ + + nv->base.nvc->pctx[nv->base.pctx_id]->priv = nv; + + return 0; +} + +void +nouveau_context_destroy(dri_context_t *dri_context) +{ + struct nouveau_context_vl *nv = dri_context->private; + + assert(dri_context); + + nouveau_context_cleanup(&nv->base); + + FREE(nv); +} + +int +nouveau_context_bind(struct nouveau_context_vl *nv, dri_drawable_t *dri_drawable) +{ + assert(nv); + assert(dri_drawable); + + if (nv->dri_drawable != dri_drawable) + { + nv->dri_drawable = dri_drawable; + dri_drawable->private = nv; + } + + return 0; +} + +int +nouveau_context_unbind(struct nouveau_context_vl *nv) +{ + assert(nv); + + nv->dri_drawable = NULL; + + return 0; +} + +/* Show starts here */ + +int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable) +{ + struct nouveau_context_vl *nv; + dri_drawable_t *dri_drawable; + + assert(pipe); + + nv = pipe->priv; + + driCreateDrawable(nv->nv_screen->dri_screen, drawable, &dri_drawable); + + nouveau_context_bind(nv, dri_drawable); + + return 0; +} + +int unbind_pipe_drawable(struct pipe_context *pipe) +{ + assert (pipe); + + nouveau_context_unbind(pipe->priv); + + return 0; +} + +struct pipe_context* create_pipe_context(Display *display, int screen) +{ + dri_screen_t *dri_screen; + dri_framebuffer_t dri_framebuf; + dri_context_t *dri_context; + struct nouveau_context_vl *nv; + + assert(display); + + driCreateScreen(display, screen, &dri_screen, &dri_framebuf); + driCreateContext(dri_screen, XDefaultVisual(display, screen), &dri_context); + + nouveau_screen_create(dri_screen, &dri_framebuf); + nouveau_context_create(dri_context); + + nv = dri_context->private; + + return nv->base.nvc->pctx[nv->base.pctx_id]; +} + +int destroy_pipe_context(struct pipe_context *pipe) +{ + struct pipe_screen *screen; + struct pipe_winsys *winsys; + struct nouveau_context_vl *nv; + dri_screen_t *dri_screen; + dri_context_t *dri_context; + + assert(pipe); + + screen = pipe->screen; + winsys = pipe->winsys; + nv = pipe->priv; + dri_context = nv->dri_context; + dri_screen = dri_context->dri_screen; + + pipe->destroy(pipe); + screen->destroy(screen); + FREE(winsys); + + nouveau_context_destroy(dri_context); + nouveau_screen_destroy(dri_screen); + driDestroyContext(dri_context); + driDestroyScreen(dri_screen); + + return 0; +} diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h new file mode 100644 index 0000000000..1115c3130c --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h @@ -0,0 +1,39 @@ +#ifndef __NOUVEAU_CONTEXT_VL_H__ +#define __NOUVEAU_CONTEXT_VL_H__ + +#include <driclient.h> +#include <nouveau/nouveau_winsys.h> +#include <common/nouveau_context.h> + +/*#include "xmlconfig.h"*/ + +struct nouveau_context_vl { + struct nouveau_context base; + struct nouveau_screen_vl *nv_screen; + dri_context_t *dri_context; + dri_drawable_t *dri_drawable; + unsigned int last_stamp; + /*driOptionCache dri_option_cache;*/ + drm_context_t drm_context; + drmLock drm_lock; +}; + +extern int nouveau_context_create(dri_context_t *); +extern void nouveau_context_destroy(dri_context_t *); +extern int nouveau_context_bind(struct nouveau_context_vl *, dri_drawable_t *); +extern int nouveau_context_unbind(struct nouveau_context_vl *); + +#ifdef DEBUG +extern int __nouveau_debug; + +#define DEBUG_BO (1 << 0) + +#define DBG(flag, ...) do { \ + if (__nouveau_debug & (DEBUG_##flag)) \ + NOUVEAU_ERR(__VA_ARGS__); \ +} while(0) +#else +#define DBG(flag, ...) +#endif + +#endif diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_lock.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_lock.c index 375634bd05..f292586974 100644 --- a/src/gallium/winsys/g3dvl/nouveau/nouveau_lock.c +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_lock.c @@ -25,12 +25,12 @@ * **************************************************************************/ -#include <pthread.h> +#include <pipe/p_thread.h> #include <driclient.h> #include "nouveau_context.h" #include "nouveau_screen.h" -static pthread_mutex_t lockMutex = PTHREAD_MUTEX_INITIALIZER; +pipe_static_mutex(lockMutex); static void nouveau_contended_lock(struct nouveau_context *nv, unsigned int flags) @@ -62,7 +62,7 @@ LOCK_HARDWARE(struct nouveau_context *nv) struct nouveau_device_priv *nvdev = nouveau_device(dev); char __ret=0; - pthread_mutex_lock(&lockMutex); + pipe_mutex_lock(lockMutex); assert(!nv->locked); DRM_CAS(nvdev->lock, nvdev->ctx, @@ -88,5 +88,5 @@ UNLOCK_HARDWARE(struct nouveau_context *nv) DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx); - pthread_mutex_unlock(&lockMutex); + pipe_mutex_unlock(lockMutex); } diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c index f80d00050c..658dafd910 100644 --- a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.c +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c @@ -1,11 +1,8 @@ -#include "pipe/p_context.h" -#include "util/u_memory.h" -#include "nouveau_context.h" +#include "nouveau_screen_vl.h" +#include <util/u_memory.h> #include <nouveau_drm.h> -#include "nouveau_dri.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" +#include <common/nouveau_dri.h> +#include <common/nouveau_local.h> #if NOUVEAU_DRM_HEADER_PATCHLEVEL != 11 #error nouveau_drm.h version does not match expected version @@ -50,34 +47,33 @@ int nouveau_check_dri_drm_ddx(dri_version_t *dri, dri_version_t *drm, dri_versio int nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf) { - struct nouveau_dri *nv_dri = dri_framebuf->private; - struct nouveau_screen *nv_screen; - int ret; + struct nouveau_dri *nv_dri = dri_framebuf->private; + struct nouveau_screen_vl *nv_screen; + + assert(dri_screen); + assert(dri_framebuf); if (nouveau_check_dri_drm_ddx(&dri_screen->dri, &dri_screen->drm, &dri_screen->ddx)) return 1; - nv_screen = CALLOC_STRUCT(nouveau_screen); + nv_screen = CALLOC_STRUCT(nouveau_screen_vl); + if (!nv_screen) return 1; - nv_screen->dri_screen = dri_screen; - dri_screen->private = (void*)nv_screen; + + if (nouveau_screen_init(nv_dri, dri_screen->fd, &nv_screen->base)) + { + FREE(nv_screen); + return 1; + } /* driParseOptionInfo(&nv_screen->option_cache, __driConfigOptions, __driNConfigOptions); */ - if ((ret = nouveau_device_open_existing(&nv_screen->device, 0, - dri_screen->fd, 0))) { - NOUVEAU_ERR("Failed opening nouveau device: %d.\n", ret); - return 1; - } - - nv_screen->front_offset = nv_dri->front_offset; - nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8); - nv_screen->front_cpp = nv_dri->bpp / 8; - nv_screen->front_height = nv_dri->height; + nv_screen->dri_screen = dri_screen; + dri_screen->private = (void*)nv_screen; return 0; } @@ -85,7 +81,8 @@ nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf) void nouveau_screen_destroy(dri_screen_t *dri_screen) { - struct nouveau_screen *nv_screen = dri_screen->private; + struct nouveau_screen_vl *nv_screen = dri_screen->private; + nouveau_screen_cleanup(&nv_screen->base); FREE(nv_screen); } diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h index 8a58bb7556..0c1ceca6de 100644 --- a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.h +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h @@ -1,19 +1,16 @@ -#ifndef __NOUVEAU_SCREEN_H__ -#define __NOUVEAU_SCREEN_H__ +#ifndef __NOUVEAU_SCREEN_VL_H__ +#define __NOUVEAU_SCREEN_VL_H__ + +#include <driclient.h> +#include <common/nouveau_screen.h> /* TODO: Investigate using DRI options for interesting things */ /*#include "xmlconfig.h"*/ -struct nouveau_screen { +struct nouveau_screen_vl +{ + struct nouveau_screen base; dri_screen_t *dri_screen; - struct nouveau_device *device; - struct nouveau_channel_context *nvc; - - uint32_t front_offset; - uint32_t front_pitch; - uint32_t front_cpp; - uint32_t front_height; - /*driOptionCache option_cache;*/ }; @@ -21,4 +18,3 @@ int nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_frame void nouveau_screen_destroy(dri_screen_t *dri_screen); #endif - diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c index 7916c80615..16e6d5543c 100644 --- a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c @@ -1,26 +1,26 @@ -#include "pipe/p_context.h" -#include "nouveau_context.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" +#include <driclient.h> +#include <common/nouveau_local.h> +#include <common/nouveau_screen.h> +#include "nouveau_context_vl.h" #include "nouveau_swapbuffers.h" void nouveau_copy_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf, const drm_clip_rect_t *rect) { - struct nouveau_context *nv = dri_drawable->private; - drm_clip_rect_t *pbox; - int nbox, i; + struct nouveau_context_vl *nv = dri_drawable->private; + drm_clip_rect_t *pbox; + int nbox, i; - LOCK_HARDWARE(nv); + LOCK_HARDWARE(&nv->base); if (!dri_drawable->num_cliprects) { - UNLOCK_HARDWARE(nv); + UNLOCK_HARDWARE(&nv->base); return; } pbox = dri_drawable->cliprects; nbox = dri_drawable->num_cliprects; - nv->surface_copy_prep(nv, nv->frontbuffer, surf); + nv->base.surface_copy_prep(&nv->base, nv->base.frontbuffer, surf); for (i = 0; i < nbox; i++, pbox++) { int sx, sy, dx, dy, w, h; @@ -31,14 +31,11 @@ nouveau_copy_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf, w = pbox->x2 - pbox->x1; h = pbox->y2 - pbox->y1; - nv->surface_copy(nv, dx, dy, sx, sy, w, h); + nv->base.surface_copy(&nv->base, dx, dy, sx, sy, w, h); } - FIRE_RING(nv->nvc->channel); - UNLOCK_HARDWARE(nv); - - //if (nv->last_stamp != dri_drawable->last_sarea_stamp) - //nv->last_stamp = dri_drawable->last_sarea_stamp; + FIRE_RING(nv->base.nvc->channel); + UNLOCK_HARDWARE(&nv->base); } void @@ -62,3 +59,35 @@ nouveau_swap_buffers(dri_drawable_t *dri_drawable, struct pipe_surface *surf) nouveau_copy_buffer(dri_drawable, surf, NULL); } +void +nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, + void *context_private) +{ + struct nouveau_context_vl *nv; + dri_drawable_t *dri_drawable; + + assert(pws); + assert(surf); + assert(context_private); + + nv = context_private; + dri_drawable = nv->dri_drawable; + + nouveau_copy_buffer(dri_drawable, surf, NULL); +} + +void +nouveau_contended_lock(struct nouveau_context *nv) +{ + struct nouveau_context_vl *nv_vl = (struct nouveau_context_vl*)nv; + dri_drawable_t *dri_drawable = nv_vl->dri_drawable; + dri_screen_t *dri_screen = nv_vl->dri_context->dri_screen; + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dri_drawable) + DRI_VALIDATE_DRAWABLE_INFO(dri_screen, dri_drawable); +} diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c index 4f6ac9cad0..17c409e1ce 100644 --- a/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c @@ -89,6 +89,31 @@ nouveau_surface_release(struct pipe_winsys *ws, struct pipe_surface **s) } } +static uint32_t +nouveau_flags_from_usage(struct nouveau_context *nv, unsigned usage) +{ + uint32_t flags = NOUVEAU_BO_LOCAL; + + if (usage & PIPE_BUFFER_USAGE_PIXEL) { + if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE) + flags |= NOUVEAU_BO_GART; + if (!(usage & PIPE_BUFFER_USAGE_CPU_READ_WRITE)) + flags |= NOUVEAU_BO_VRAM; + } + + if (usage & PIPE_BUFFER_USAGE_VERTEX) { + if (nv->cap.hw_vertex_buffer) + flags |= NOUVEAU_BO_GART; + } + + if (usage & PIPE_BUFFER_USAGE_INDEX) { + if (nv->cap.hw_index_buffer) + flags |= NOUVEAU_BO_GART; + } + + return flags; +} + static struct pipe_buffer * nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, unsigned usage, unsigned size) @@ -107,23 +132,7 @@ nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, nvbuf->base.usage = usage; nvbuf->base.size = size; - flags = NOUVEAU_BO_LOCAL; - - if (usage & PIPE_BUFFER_USAGE_PIXEL) { - if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE) - flags |= NOUVEAU_BO_GART; - flags |= NOUVEAU_BO_VRAM; - } - - if (usage & PIPE_BUFFER_USAGE_VERTEX) { - if (nv->cap.hw_vertex_buffer) - flags |= NOUVEAU_BO_GART; - } - - if (usage & PIPE_BUFFER_USAGE_INDEX) { - if (nv->cap.hw_index_buffer) - flags |= NOUVEAU_BO_GART; - } + flags = nouveau_flags_from_usage(nv, usage); if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { free(nvbuf); @@ -175,6 +184,26 @@ nouveau_pipe_bo_map(struct pipe_winsys *pws, struct pipe_buffer *buf, if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) map_flags |= NOUVEAU_BO_WR; + if ((map_flags & NOUVEAU_BO_RDWR) == NOUVEAU_BO_WR && + !nouveau_bo_busy(nvbuf->bo, map_flags)) { + /* XXX: Technically incorrect. If the client maps a buffer for write-only + * and leaves part of the buffer untouched it probably expects those parts + * to remain intact. This is violated because we allocate a whole new buffer + * and don't copy the previous buffer's contents, so this optimization is + * only valid if the client intends to overwrite the whole buffer. + */ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_context *nv = nvpws->nv; + struct nouveau_device *dev = nv->nv_screen->device; + struct nouveau_bo *rename; + uint32_t flags = nouveau_flags_from_usage(nv, buf->usage); + + if (!nouveau_bo_new(dev, flags, buf->alignment, buf->size, &rename)) { + nouveau_bo_del(&nvbuf->bo); + nvbuf->bo = rename; + } + } + if (nouveau_bo_map(nvbuf->bo, map_flags)) return NULL; return nvbuf->bo->map; |