diff options
Diffstat (limited to 'src/gallium/drivers')
46 files changed, 16935 insertions, 11 deletions
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index 8c290273fb..1f4e5171c0 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -66,7 +66,7 @@ void nouveau_screen_fini(struct nouveau_screen *); - +#ifndef NOUVEAU_NVC0  static INLINE unsigned  RING_3D(unsigned mthd, unsigned size)  { @@ -78,5 +78,6 @@ RING_3D_NI(unsigned mthd, unsigned size)  {  	return 0x40000000 | (7 << 13) | (size << 18) | mthd;  } +#endif  #endif diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index ab480cabd0..e7acbe6c3d 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -10,7 +10,9 @@  #include "nouveau/nouveau_grobj.h"  #include "nouveau/nouveau_notifier.h"  #include "nouveau/nouveau_resource.h" +#ifndef NOUVEAU_NVC0  #include "nouveau/nouveau_pushbuf.h" +#endif  #ifndef NV04_PFIFO_MAX_PACKET_LEN  #define NV04_PFIFO_MAX_PACKET_LEN 2047 @@ -41,4 +43,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *);  extern struct pipe_screen *  nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *); +extern struct pipe_screen * +nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *); +  #endif diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index cb7653c3fe..a5b0d0478c 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -8,12 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng  git clone git://0x04.net/rules-ng-ng  The rules-ng-ng source files this header was generated from are: -- nv30-40_3d.xml (  31709 bytes, from 2010-09-05 07:53:14) -- copyright.xml  (   6503 bytes, from 2010-04-10 23:15:50) -- nv_3ddefs.xml  (  15193 bytes, from 2010-09-05 07:50:15) -- nv_defs.xml    (   4437 bytes, from 2010-08-05 19:38:53) -- nv_object.xml  (  10424 bytes, from 2010-08-05 19:38:53) -- nvchipsets.xml (   2824 bytes, from 2010-08-05 19:38:53) +- nv_object.xml  (  11547 bytes, from 2010-10-24 15:29:34) +- copyright.xml  (   6498 bytes, from 2010-10-03 13:18:37) +- nvchipsets.xml (   2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml    (   4437 bytes, from 2010-07-06 07:43:58)  Copyright (C) 2006-2010 by the following authors:  - Artur Huillet <arthur.huillet@free.fr> (ahuillet) @@ -37,7 +35,7 @@ Copyright (C) 2006-2010 by the following authors:  - Mark Carey <mark.carey@gmail.com> (careym)  - Matthieu Castet <matthieu.castet@parrot.com> (mat-c)  - nvidiaman <nvidiaman@users.sf.net> (nvidiaman) -- Patrice Mandin <mandin.patrice@orange.fr> (pmandin, pmdata) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)  - Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)  - Peter Popov <ironpeter@users.sf.net> (ironpeter)  - Richard Hughes <hughsient@users.sf.net> (hughsient) @@ -180,6 +178,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  #define NV50_COMPUTE						0x000050c0  #define NVA3_COMPUTE						0x000085c0  #define NVC0_COMPUTE						0x000090c0 +#define NV84_CRYPT						0x000074c1  #define NV01_SUBCHAN__SIZE					0x00002000  #define NV01_SUBCHAN						0x00000000 @@ -194,9 +193,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  #define NV84_SUBCHAN_QUERY_GET					0x0000001c -#define NV84_SUBCHAN_UNK20					0x00000020 +#define NV84_SUBCHAN_QUERY_INTR					0x00000020 -#define NV84_SUBCHAN_UNK24					0x00000024 +#define NV84_SUBCHAN_WRCACHE_FLUSH				0x00000024  #define NV10_SUBCHAN_REF_CNT					0x00000050 @@ -209,7 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  #define NV11_SUBCHAN_SEMAPHORE_RELEASE				0x0000006c -#define NV50_SUBCHAN_UNK80					0x00000080 +#define NV40_SUBCHAN_YIELD					0x00000080  #define NV01_GRAPH						0x00000000 @@ -227,5 +226,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  #define NV40_GRAPH_PM_TRIGGER					0x00000140 +#define NVC0_SUBCHAN__SIZE					0x00008000 +#define NVC0_SUBCHAN						0x00000000 + +#define NVC0_SUBCHAN_OBJECT					0x00000000 + + +#define NVC0_SUBCHAN_QUERY_ADDRESS_HIGH				0x00000010 + +#define NVC0_SUBCHAN_QUERY_ADDRESS_LOW				0x00000014 + +#define NVC0_SUBCHAN_QUERY_SEQUENCE				0x00000018 + +#define NVC0_SUBCHAN_QUERY_GET					0x0000001c + +#define NVC0_SUBCHAN_REF_CNT					0x00000050 + +#define NVC0_GRAPH						0x00000000 + +#define NVC0_GRAPH_NOP						0x00000100 + +#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH				0x00000104 + +#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW				0x00000108 + +#define NVC0_GRAPH_NOTIFY					0x0000010c +#define NVC0_GRAPH_NOTIFY_WRITE					0x00000000 +#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN			0x00000001 + +#define NVC0_GRAPH_SERIALIZE					0x00000110 + +#define NVC0_GRAPH_MACRO_UPLOAD_POS				0x00000114 + +#define NVC0_GRAPH_MACRO_UPLOAD_DATA				0x00000118 + +#define NVC0_GRAPH_MACRO_ID					0x0000011c + +#define NVC0_GRAPH_MACRO_POS					0x00000120 +  #endif /* NV_OBJECT_XML */ diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile new file mode 100644 index 0000000000..5c3d46d9ea --- /dev/null +++ b/src/gallium/drivers/nvc0/Makefile @@ -0,0 +1,33 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nvc0 + +C_SOURCES = \ +	nvc0_buffer.c \ +	nvc0_context.c \ +	nvc0_draw.c \ +	nvc0_formats.c \ +	nvc0_miptree.c \ +	nvc0_resource.c \ +	nvc0_screen.c \ +	nvc0_state.c \ +	nvc0_state_validate.c \ +	nvc0_surface.c \ +	nvc0_tex.c \ +	nvc0_transfer.c \ +	nvc0_vbo.c \ +	nvc0_program.c \ +	nvc0_shader_state.c \ +	nvc0_pc.c \ +	nvc0_pc_print.c \ +	nvc0_pc_emit.c \ +	nvc0_tgsi_to_nc.c \ +	nvc0_pc_optimize.c \ +	nvc0_pc_regalloc.c \ +	nvc0_push.c \ +	nvc0_push2.c \ +	nvc0_fence.c \ +	nvc0_mm.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript new file mode 100644 index 0000000000..ec529d44f5 --- /dev/null +++ b/src/gallium/drivers/nvc0/SConscript @@ -0,0 +1,35 @@ +Import('*') + +env = env.Clone() + +nvc0 = env.ConvenienceLibrary( +    target = 'nvc0', +    source = [ +        'nvc0_buffer.c', +        'nvc0_context.c', +        'nvc0_draw.c', +        'nvc0_formats.c', +        'nvc0_miptree.c', +        'nvc0_resource.c', +        'nvc0_screen.c', +        'nvc0_state.c', +        'nvc0_state_validate.c', +        'nvc0_surface.c', +        'nvc0_tex.c', +        'nvc0_transfer.c', +        'nvc0_vbo.c', +        'nvc0_program.c', +        'nvc0_shader_state.c', +        'nvc0_pc.c', +        'nvc0_pc_print.c', +        'nvc0_pc_emit.c', +        'nvc0_tgsi_to_nc.c', +        'nvc0_pc_optimize.c', +        'nvc0_pc_regalloc.c', +        'nvc0_push.c', +        'nvc0_push2.c', +        'nvc0_fence.c', +        'nvc0_mm' +    ]) + +Export('nvc0') diff --git a/src/gallium/drivers/nvc0/nv50_defs.xml.h b/src/gallium/drivers/nvc0/nv50_defs.xml.h new file mode 100644 index 0000000000..1bf2f802b5 --- /dev/null +++ b/src/gallium/drivers/nvc0/nv50_defs.xml.h @@ -0,0 +1,142 @@ +#ifndef NV50_DEFS_XML +#define NV50_DEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_defs.xml (   4482 bytes, from 2010-10-03 13:18:37) +- copyright.xml (   6498 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT			0x000000c0 +#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT			0x000000c1 +#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT			0x000000c2 +#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT			0x000000c3 +#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM			0x000000c6 +#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM			0x000000c7 +#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT			0x000000c8 +#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT			0x000000c9 +#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT			0x000000ca +#define NV50_SURFACE_FORMAT_R32G32_FLOAT			0x000000cb +#define NV50_SURFACE_FORMAT_R32G32_SINT				0x000000cc +#define NV50_SURFACE_FORMAT_R32G32_UINT				0x000000cd +#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT			0x000000ce +#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM			0x000000cf +#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB			0x000000d0 +#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM			0x000000d1 +#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT			0x000000d2 +#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM			0x000000d5 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB			0x000000d6 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM			0x000000d7 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT			0x000000d8 +#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT			0x000000d9 +#define NV50_SURFACE_FORMAT_R16G16_UNORM			0x000000da +#define NV50_SURFACE_FORMAT_R16G16_SNORM			0x000000db +#define NV50_SURFACE_FORMAT_R16G16_SINT				0x000000dc +#define NV50_SURFACE_FORMAT_R16G16_UINT				0x000000dd +#define NV50_SURFACE_FORMAT_R16G16_FLOAT			0x000000de +#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM			0x000000df +#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT			0x000000e0 +#define NV50_SURFACE_FORMAT_R32_FLOAT				0x000000e5 +#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM			0x000000e6 +#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB			0x000000e7 +#define NV50_SURFACE_FORMAT_R5G6B5_UNORM			0x000000e8 +#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM			0x000000e9 +#define NV50_SURFACE_FORMAT_R8G8_UNORM				0x000000ea +#define NV50_SURFACE_FORMAT_R8G8_SNORM				0x000000eb +#define NV50_SURFACE_FORMAT_R8G8_SINT				0x000000ec +#define NV50_SURFACE_FORMAT_R8G8_UINT				0x000000ed +#define NV50_SURFACE_FORMAT_R16_UNORM				0x000000ee +#define NV50_SURFACE_FORMAT_R16_SNORM				0x000000ef +#define NV50_SURFACE_FORMAT_R16_SINT				0x000000f0 +#define NV50_SURFACE_FORMAT_R16_UINT				0x000000f1 +#define NV50_SURFACE_FORMAT_R16_FLOAT				0x000000f2 +#define NV50_SURFACE_FORMAT_R8_UNORM				0x000000f3 +#define NV50_SURFACE_FORMAT_R8_SNORM				0x000000f4 +#define NV50_SURFACE_FORMAT_R8_SINT				0x000000f5 +#define NV50_SURFACE_FORMAT_R8_UINT				0x000000f6 +#define NV50_SURFACE_FORMAT_A8_UNORM				0x000000f7 +#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM			0x000000f8 +#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM			0x000000f9 +#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB			0x000000fa +#define NV50_ZETA_FORMAT_Z32_FLOAT				0x0000000a +#define NV50_ZETA_FORMAT_Z16_UNORM				0x00000013 +#define NV50_ZETA_FORMAT_Z24S8_UNORM				0x00000014 +#define NV50_ZETA_FORMAT_X8Z24_UNORM				0x00000015 +#define NV50_ZETA_FORMAT_S8Z24_UNORM				0x00000016 +#define NV50_ZETA_FORMAT_UNK18					0x00000018 +#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM			0x00000019 +#define NV50_ZETA_FORMAT_UNK1D					0x0000001d +#define NV50_ZETA_FORMAT_UNK1E					0x0000001e +#define NV50_ZETA_FORMAT_UNK1F					0x0000001f +#define NV50_QUERY__SIZE					0x00000010 +#define NV50_QUERY_COUNTER					0x00000000 + +#define NV50_QUERY_RES						0x00000004 + +#define NV50_QUERY_TIME						0x00000008 + + +#endif /* NV50_DEFS_XML */ diff --git a/src/gallium/drivers/nvc0/nv50_texture.xml.h b/src/gallium/drivers/nvc0/nv50_texture.xml.h new file mode 100644 index 0000000000..9f83206516 --- /dev/null +++ b/src/gallium/drivers/nvc0/nv50_texture.xml.h @@ -0,0 +1,259 @@ +#ifndef NV50_TEXTURE_XML +#define NV50_TEXTURE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_texture.xml (   6871 bytes, from 2010-10-03 13:18:37) +- copyright.xml    (   6498 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_TIC_MAP_ZERO					0x00000000 +#define NV50_TIC_MAP_C0						0x00000002 +#define NV50_TIC_MAP_C1						0x00000003 +#define NV50_TIC_MAP_C2						0x00000004 +#define NV50_TIC_MAP_C3						0x00000005 +#define NV50_TIC_MAP_ONE					0x00000007 +#define NV50_TIC_TYPE_SNORM					0x00000001 +#define NV50_TIC_TYPE_UNORM					0x00000002 +#define NV50_TIC_TYPE_SINT					0x00000003 +#define NV50_TIC_TYPE_UINT					0x00000004 +#define NV50_TIC_TYPE_SSCALED					0x00000005 +#define NV50_TIC_TYPE_USCALED					0x00000006 +#define NV50_TIC_TYPE_FLOAT					0x00000007 +#define NV50_TSC_WRAP_REPEAT					0x00000000 +#define NV50_TSC_WRAP_MIRROR_REPEAT				0x00000001 +#define NV50_TSC_WRAP_CLAMP_TO_EDGE				0x00000002 +#define NV50_TSC_WRAP_CLAMP_TO_BORDER				0x00000003 +#define NV50_TSC_WRAP_CLAMP					0x00000004 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE			0x00000005 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER			0x00000006 +#define NV50_TSC_WRAP_MIRROR_CLAMP				0x00000007 +#define NV50_TIC__SIZE						0x00000020 +#define NV50_TIC_0						0x00000000 +#define NV50_TIC_0_MAPA__MASK					0x38000000 +#define NV50_TIC_0_MAPA__SHIFT					27 +#define NV50_TIC_0_MAPB__MASK					0x07000000 +#define NV50_TIC_0_MAPB__SHIFT					24 +#define NV50_TIC_0_MAPG__MASK					0x00e00000 +#define NV50_TIC_0_MAPG__SHIFT					21 +#define NV50_TIC_0_MAPR__MASK					0x001c0000 +#define NV50_TIC_0_MAPR__SHIFT					18 +#define NV50_TIC_0_TYPE3__MASK					0x00038000 +#define NV50_TIC_0_TYPE3__SHIFT					15 +#define NV50_TIC_0_TYPE2__MASK					0x00007000 +#define NV50_TIC_0_TYPE2__SHIFT					12 +#define NV50_TIC_0_TYPE1__MASK					0x00000e00 +#define NV50_TIC_0_TYPE1__SHIFT					9 +#define NV50_TIC_0_TYPE0__MASK					0x000001c0 +#define NV50_TIC_0_TYPE0__SHIFT					6 +#define NV50_TIC_0_SWIZZLE__MASK				0x3ffc0000 +#define NV50_TIC_0_FMT__MASK					0x0000003f +#define NV50_TIC_0_FMT__SHIFT					0 +#define NV50_TIC_0_FMT_32_32_32_32				0x00000001 +#define NV50_TIC_0_FMT_16_16_16_16				0x00000003 +#define NV50_TIC_0_FMT_32_32					0x00000004 +#define NV50_TIC_0_FMT_32_8					0x00000005 +#define NV50_TIC_0_FMT_8_8_8_8					0x00000008 +#define NV50_TIC_0_FMT_2_10_10_10				0x00000009 +#define NV50_TIC_0_FMT_16_16					0x0000000c +#define NV50_TIC_0_FMT_8_24					0x0000000d +#define NV50_TIC_0_FMT_24_8					0x0000000e +#define NV50_TIC_0_FMT_32					0x0000000f +#define NV50_TIC_0_FMT_4_4_4_4					0x00000012 +#define NV50_TIC_0_FMT_5_5_5_1					0x00000013 +#define NV50_TIC_0_FMT_1_5_5_5					0x00000014 +#define NV50_TIC_0_FMT_5_6_5					0x00000015 +#define NV50_TIC_0_FMT_6_5_5					0x00000016 +#define NV50_TIC_0_FMT_8_8					0x00000018 +#define NV50_TIC_0_FMT_16					0x0000001b +#define NV50_TIC_0_FMT_8					0x0000001d +#define NV50_TIC_0_FMT_4_4					0x0000001e +#define NV50_TIC_0_FMT_UNK1F					0x0000001f +#define NV50_TIC_0_FMT_E5_9_9_9					0x00000020 +#define NV50_TIC_0_FMT_10_11_11					0x00000021 +#define NV50_TIC_0_FMT_C1_C2_C1_C0				0x00000022 +#define NV50_TIC_0_FMT_C2_C1_C0_C1				0x00000023 +#define NV50_TIC_0_FMT_DXT1					0x00000024 +#define NV50_TIC_0_FMT_DXT3					0x00000025 +#define NV50_TIC_0_FMT_DXT5					0x00000026 +#define NV50_TIC_0_FMT_RGTC1					0x00000027 +#define NV50_TIC_0_FMT_RGTC2					0x00000028 +#define NV50_TIC_0_FMT_24_8_ZETA				0x00000029 +#define NV50_TIC_0_FMT_8_24_ZETA				0x0000002a +#define NV50_TIC_0_FMT_UNK2C_ZETA				0x0000002c +#define NV50_TIC_0_FMT_UNK2D_ZETA				0x0000002d +#define NV50_TIC_0_FMT_UNK2E_ZETA				0x0000002e +#define NV50_TIC_0_FMT_32_ZETA					0x0000002f +#define NV50_TIC_0_FMT_32_8_ZETA				0x00000030 +#define NV50_TIC_0_FMT_16_ZETA				0x0000003a + +#define NV50_TIC_1						0x00000004 +#define NV50_TIC_1_OFFSET_LOW__MASK				0xffffffff +#define NV50_TIC_1_OFFSET_LOW__SHIFT				0 + +#define NV50_TIC_2						0x00000008 +#define NV50_TIC_2_OFFSET_HIGH__MASK				0x000000ff +#define NV50_TIC_2_OFFSET_HIGH__SHIFT				0 +#define NV50_TIC_2_COLORSPACE_SRGB				0x00000400 +#define NV50_TIC_2_TARGET__MASK					0x0003c000 +#define NV50_TIC_2_TARGET__SHIFT				14 +#define NV50_TIC_2_TARGET_1D					0x00000000 +#define NV50_TIC_2_TARGET_2D					0x00004000 +#define NV50_TIC_2_TARGET_3D					0x00008000 +#define NV50_TIC_2_TARGET_CUBE					0x0000c000 +#define NV50_TIC_2_TARGET_1D_ARRAY				0x00010000 +#define NV50_TIC_2_TARGET_2D_ARRAY				0x00014000 +#define NV50_TIC_2_TARGET_BUFFER				0x00018000 +#define NV50_TIC_2_TARGET_RECT					0x0001c000 +#define NV50_TIC_2_TARGET_CUBE_ARRAY				0x00020000 +#define NV50_TIC_2_TILE_MODE_LINEAR				0x00040000 +#define NV50_TIC_2_TILE_MODE_Y__MASK				0x01c00000 +#define NV50_TIC_2_TILE_MODE_Y__SHIFT				22 +#define NV50_TIC_2_TILE_MODE_Z__MASK				0x0e000000 +#define NV50_TIC_2_TILE_MODE_Z__SHIFT				25 +#define NV50_TIC_2_2D_UNK0258__MASK				0x30000000 +#define NV50_TIC_2_2D_UNK0258__SHIFT				28 +#define NV50_TIC_2_NORMALIZED_COORDS				0x80000000 + +#define NV50_TIC_3						0x0000000c +#define NV50_TIC_3_PITCH__MASK					0xffffffff +#define NV50_TIC_3_PITCH__SHIFT					0 + +#define NV50_TIC_4						0x00000010 +#define NV50_TIC_4_WIDTH__MASK					0xffffffff +#define NV50_TIC_4_WIDTH__SHIFT					0 + +#define NV50_TIC_5						0x00000014 +#define NV50_TIC_5_LAST_LEVEL__MASK				0xf0000000 +#define NV50_TIC_5_LAST_LEVEL__SHIFT				28 +#define NV50_TIC_5_DEPTH__MASK					0x0fff0000 +#define NV50_TIC_5_DEPTH__SHIFT					16 +#define NV50_TIC_5_HEIGHT__MASK					0x0000ffff +#define NV50_TIC_5_HEIGHT__SHIFT				0 + +#define NV50_TIC_7						0x0000001c +#define NV50_TIC_7_BASE_LEVEL__MASK				0x0000000f +#define NV50_TIC_7_BASE_LEVEL__SHIFT				0 +#define NV50_TIC_7_MAX_LEVEL__MASK				0x000000f0 +#define NV50_TIC_7_MAX_LEVEL__SHIFT				4 + +#define NV50_TSC__SIZE						0x00000020 +#define NV50_TSC_0						0x00000000 +#define NV50_TSC_0_WRAPS__MASK					0x00000007 +#define NV50_TSC_0_WRAPS__SHIFT					0 +#define NV50_TSC_0_WRAPT__MASK					0x00000038 +#define NV50_TSC_0_WRAPT__SHIFT					3 +#define NV50_TSC_0_WRAPR__MASK					0x000001c0 +#define NV50_TSC_0_WRAPR__SHIFT					6 +#define NV50_TSC_0_SHADOW_COMPARE_ENABLE			0x00000200 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK			0x00001c00 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT			10 +#define NV50_TSC_0_ANISOTROPY_MASK__MASK			0x00700000 +#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT			20 + +#define NV50_TSC_1						0x00000004 +#define NV50_TSC_1_UNKN_ANISO_15				0x10000000 +#define NV50_TSC_1_UNKN_ANISO_35				0x18000000 +#define NV50_TSC_1_MAGF__MASK					0x00000003 +#define NV50_TSC_1_MAGF__SHIFT					0 +#define NV50_TSC_1_MAGF_NEAREST					0x00000001 +#define NV50_TSC_1_MAGF_LINEAR					0x00000002 +#define NV50_TSC_1_MINF__MASK					0x00000030 +#define NV50_TSC_1_MINF__SHIFT					4 +#define NV50_TSC_1_MINF_NEAREST					0x00000010 +#define NV50_TSC_1_MINF_LINEAR					0x00000020 +#define NV50_TSC_1_MIPF__MASK					0x000000c0 +#define NV50_TSC_1_MIPF__SHIFT					6 +#define NV50_TSC_1_MIPF_NONE					0x00000040 +#define NV50_TSC_1_MIPF_NEAREST					0x00000080 +#define NV50_TSC_1_MIPF_LINEAR					0x000000c0 +#define NV50_TSC_1_LOD_BIAS__MASK				0x01fff000 +#define NV50_TSC_1_LOD_BIAS__SHIFT				12 + +#define NV50_TSC_2						0x00000008 +#define NV50_TSC_2_MIN_LOD__MASK				0x00000f00 +#define NV50_TSC_2_MIN_LOD__SHIFT				8 +#define NV50_TSC_2_MAX_LOD__MASK				0x00f00000 +#define NV50_TSC_2_MAX_LOD__SHIFT				20 + +#define NV50_TSC_4						0x00000010 +#define NV50_TSC_4_BORDER_COLOR_RED__MASK			0xffffffff +#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT			0 + +#define NV50_TSC_5						0x00000014 +#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK			0xffffffff +#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT			0 + +#define NV50_TSC_6						0x00000018 +#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK			0xffffffff +#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT			0 + +#define NV50_TSC_7						0x0000001c +#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK			0xffffffff +#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT			0 + + +#endif /* NV50_TEXTURE_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nvc0/nvc0_2d.xml.h new file mode 100644 index 0000000000..aebcd510e8 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_2d.xml.h @@ -0,0 +1,380 @@ +#ifndef NVC0_2D_XML +#define NVC0_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_2d.xml    (   9454 bytes, from 2010-10-16 16:03:11) +- copyright.xml  (   6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml  (  11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml (   2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml    (   4437 bytes, from 2010-07-06 07:43:58) +- nv50_defs.xml  (   4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_2D_DST_FORMAT					0x00000200 + +#define NVC0_2D_DST_LINEAR					0x00000204 + +#define NVC0_2D_DST_TILE_MODE					0x00000208 + +#define NVC0_2D_DST_DEPTH					0x0000020c + +#define NVC0_2D_DST_LAYER					0x00000210 + +#define NVC0_2D_DST_PITCH					0x00000214 + +#define NVC0_2D_DST_WIDTH					0x00000218 + +#define NVC0_2D_DST_HEIGHT					0x0000021c + +#define NVC0_2D_DST_ADDRESS_HIGH				0x00000220 + +#define NVC0_2D_DST_ADDRESS_LOW					0x00000224 + +#define NVC0_2D_UNK228						0x00000228 + +#define NVC0_2D_SRC_FORMAT					0x00000230 + +#define NVC0_2D_SRC_LINEAR					0x00000234 + +#define NVC0_2D_SRC_TILE_MODE					0x00000238 + +#define NVC0_2D_SRC_DEPTH					0x0000023c + +#define NVC0_2D_SRC_LAYER					0x00000240 + +#define NVC0_2D_SRC_PITCH					0x00000244 +#define NVC0_2D_SRC_PITCH__MAX					0x00040000 + +#define NVC0_2D_SRC_WIDTH					0x00000248 +#define NVC0_2D_SRC_WIDTH__MAX					0x00010000 + +#define NVC0_2D_SRC_HEIGHT					0x0000024c +#define NVC0_2D_SRC_HEIGHT__MAX					0x00010000 + +#define NVC0_2D_SRC_ADDRESS_HIGH				0x00000250 + +#define NVC0_2D_SRC_ADDRESS_LOW					0x00000254 + +#define NVC0_2D_UNK258						0x00000258 + +#define NVC0_2D_UNK260						0x00000260 + +#define NVC0_2D_COND_ADDRESS_HIGH				0x00000264 + +#define NVC0_2D_COND_ADDRESS_LOW				0x00000268 + +#define NVC0_2D_COND_MODE					0x0000026c +#define NVC0_2D_COND_MODE_NEVER					0x00000000 +#define NVC0_2D_COND_MODE_ALWAYS				0x00000001 +#define NVC0_2D_COND_MODE_RES_NON_ZERO				0x00000002 +#define NVC0_2D_COND_MODE_EQUAL					0x00000003 +#define NVC0_2D_COND_MODE_NOT_EQUAL				0x00000004 + +#define NVC0_2D_CLIP_X						0x00000280 + +#define NVC0_2D_CLIP_Y						0x00000284 + +#define NVC0_2D_CLIP_W						0x00000288 + +#define NVC0_2D_CLIP_H						0x0000028c + +#define NVC0_2D_CLIP_ENABLE					0x00000290 + +#define NVC0_2D_COLOR_KEY_FORMAT				0x00000294 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP				0x00000000 +#define NVC0_2D_COLOR_KEY_FORMAT_15BPP				0x00000001 +#define NVC0_2D_COLOR_KEY_FORMAT_24BPP				0x00000002 +#define NVC0_2D_COLOR_KEY_FORMAT_30BPP				0x00000003 +#define NVC0_2D_COLOR_KEY_FORMAT_8BPP				0x00000004 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2				0x00000005 +#define NVC0_2D_COLOR_KEY_FORMAT_32BPP				0x00000006 + +#define NVC0_2D_COLOR_KEY					0x00000298 + +#define NVC0_2D_COLOR_KEY_ENABLE				0x0000029c + +#define NVC0_2D_ROP						0x000002a0 + +#define NVC0_2D_BETA1						0x000002a4 + +#define NVC0_2D_BETA4						0x000002a8 + +#define NVC0_2D_OPERATION					0x000002ac +#define NVC0_2D_OPERATION_SRCCOPY_AND				0x00000000 +#define NVC0_2D_OPERATION_ROP_AND				0x00000001 +#define NVC0_2D_OPERATION_BLEND_AND				0x00000002 +#define NVC0_2D_OPERATION_SRCCOPY				0x00000003 +#define NVC0_2D_OPERATION_UNK4					0x00000004 +#define NVC0_2D_OPERATION_SRCCOPY_PREMULT			0x00000005 +#define NVC0_2D_OPERATION_BLEND_PREMULT				0x00000006 + +#define NVC0_2D_UNK2B0						0x000002b0 +#define NVC0_2D_UNK2B0_UNK0__MASK				0x0000003f +#define NVC0_2D_UNK2B0_UNK0__SHIFT				0 +#define NVC0_2D_UNK2B0_UNK1__MASK				0x00003f00 +#define NVC0_2D_UNK2B0_UNK1__SHIFT				8 + +#define NVC0_2D_PATTERN_SELECT					0x000002b4 +#define NVC0_2D_PATTERN_SELECT_MONO_8X8				0x00000000 +#define NVC0_2D_PATTERN_SELECT_MONO_64X1			0x00000001 +#define NVC0_2D_PATTERN_SELECT_MONO_1X64			0x00000002 +#define NVC0_2D_PATTERN_SELECT_COLOR				0x00000003 + +#define NVC0_2D_PATTERN_COLOR_FORMAT				0x000002e8 +#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP			0x00000000 +#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP			0x00000001 +#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP			0x00000002 +#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP			0x00000003 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4			0x00000004 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5			0x00000005 + +#define NVC0_2D_PATTERN_MONO_FORMAT				0x000002ec +#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6			0x00000000 +#define NVC0_2D_PATTERN_MONO_FORMAT_LE				0x00000001 + +#define NVC0_2D_PATTERN_COLOR(i0)			       (0x000002f0 + 0x4*(i0)) +#define NVC0_2D_PATTERN_COLOR__ESIZE				0x00000004 +#define NVC0_2D_PATTERN_COLOR__LEN				0x00000002 + +#define NVC0_2D_PATTERN_BITMAP(i0)			       (0x000002f8 + 0x4*(i0)) +#define NVC0_2D_PATTERN_BITMAP__ESIZE				0x00000004 +#define NVC0_2D_PATTERN_BITMAP__LEN				0x00000002 + +#define NVC0_2D_PATTERN_X8R8G8B8(i0)			       (0x00000300 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE				0x00000004 +#define NVC0_2D_PATTERN_X8R8G8B8__LEN				0x00000040 +#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK			0x000000ff +#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT			0 +#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK			0x0000ff00 +#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT			8 +#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK			0x00ff0000 +#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT			16 + +#define NVC0_2D_PATTERN_R5G6B5(i0)			       (0x00000400 + 0x4*(i0)) +#define NVC0_2D_PATTERN_R5G6B5__ESIZE				0x00000004 +#define NVC0_2D_PATTERN_R5G6B5__LEN				0x00000020 +#define NVC0_2D_PATTERN_R5G6B5_B0__MASK				0x0000001f +#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT			0 +#define NVC0_2D_PATTERN_R5G6B5_G0__MASK				0x000007e0 +#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT			5 +#define NVC0_2D_PATTERN_R5G6B5_R0__MASK				0x0000f800 +#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT			11 +#define NVC0_2D_PATTERN_R5G6B5_B1__MASK				0x001f0000 +#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT			16 +#define NVC0_2D_PATTERN_R5G6B5_G1__MASK				0x07e00000 +#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT			21 +#define NVC0_2D_PATTERN_R5G6B5_R1__MASK				0xf8000000 +#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT			27 + +#define NVC0_2D_PATTERN_X1R5G5B5(i0)			       (0x00000480 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE				0x00000004 +#define NVC0_2D_PATTERN_X1R5G5B5__LEN				0x00000020 +#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK			0x0000001f +#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT			0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK			0x000003e0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT			5 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK			0x00007c00 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT			10 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK			0x001f0000 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT			16 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK			0x03e00000 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT			21 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK			0x7c000000 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT			26 + +#define NVC0_2D_PATTERN_Y8(i0)				       (0x00000500 + 0x4*(i0)) +#define NVC0_2D_PATTERN_Y8__ESIZE				0x00000004 +#define NVC0_2D_PATTERN_Y8__LEN					0x00000010 +#define NVC0_2D_PATTERN_Y8_Y0__MASK				0x000000ff +#define NVC0_2D_PATTERN_Y8_Y0__SHIFT				0 +#define NVC0_2D_PATTERN_Y8_Y1__MASK				0x0000ff00 +#define NVC0_2D_PATTERN_Y8_Y1__SHIFT				8 +#define NVC0_2D_PATTERN_Y8_Y2__MASK				0x00ff0000 +#define NVC0_2D_PATTERN_Y8_Y2__SHIFT				16 +#define NVC0_2D_PATTERN_Y8_Y3__MASK				0xff000000 +#define NVC0_2D_PATTERN_Y8_Y3__SHIFT				24 + +#define NVC0_2D_DRAW_SHAPE					0x00000580 +#define NVC0_2D_DRAW_SHAPE_POINTS				0x00000000 +#define NVC0_2D_DRAW_SHAPE_LINES				0x00000001 +#define NVC0_2D_DRAW_SHAPE_LINE_STRIP				0x00000002 +#define NVC0_2D_DRAW_SHAPE_TRIANGLES				0x00000003 +#define NVC0_2D_DRAW_SHAPE_RECTANGLES				0x00000004 + +#define NVC0_2D_DRAW_COLOR_FORMAT				0x00000584 + +#define NVC0_2D_DRAW_COLOR					0x00000588 + +#define NVC0_2D_UNK58C						0x0000058c +#define NVC0_2D_UNK58C_0					0x00000001 +#define NVC0_2D_UNK58C_1					0x00000010 +#define NVC0_2D_UNK58C_2					0x00000100 +#define NVC0_2D_UNK58C_3					0x00001000 + +#define NVC0_2D_DRAW_POINT16					0x000005e0 +#define NVC0_2D_DRAW_POINT16_X__MASK				0x0000ffff +#define NVC0_2D_DRAW_POINT16_X__SHIFT				0 +#define NVC0_2D_DRAW_POINT16_Y__MASK				0xffff0000 +#define NVC0_2D_DRAW_POINT16_Y__SHIFT				16 + +#define NVC0_2D_DRAW_POINT32_X(i0)			       (0x00000600 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_X__ESIZE				0x00000008 +#define NVC0_2D_DRAW_POINT32_X__LEN				0x00000040 + +#define NVC0_2D_DRAW_POINT32_Y(i0)			       (0x00000604 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_Y__ESIZE				0x00000008 +#define NVC0_2D_DRAW_POINT32_Y__LEN				0x00000040 + +#define NVC0_2D_SIFC_BITMAP_ENABLE				0x00000800 + +#define NVC0_2D_SIFC_FORMAT					0x00000804 + +#define NVC0_2D_SIFC_BITMAP_FORMAT				0x00000808 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I1				0x00000000 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I4				0x00000001 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I8				0x00000002 + +#define NVC0_2D_SIFC_BITMAP_LSB_FIRST				0x0000080c + +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE			0x00000810 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED		0x00000000 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE		0x00000001 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD		0x00000002 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0				0x00000814 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1				0x00000818 + +#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE			0x0000081c + +#define NVC0_2D_SIFC_WIDTH					0x00000838 + +#define NVC0_2D_SIFC_HEIGHT					0x0000083c + +#define NVC0_2D_SIFC_DX_DU_FRACT				0x00000840 + +#define NVC0_2D_SIFC_DX_DU_INT					0x00000844 + +#define NVC0_2D_SIFC_DY_DV_FRACT				0x00000848 + +#define NVC0_2D_SIFC_DY_DV_INT					0x0000084c + +#define NVC0_2D_SIFC_DST_X_FRACT				0x00000850 + +#define NVC0_2D_SIFC_DST_X_INT					0x00000854 + +#define NVC0_2D_SIFC_DST_Y_FRACT				0x00000858 + +#define NVC0_2D_SIFC_DST_Y_INT					0x0000085c + +#define NVC0_2D_SIFC_DATA					0x00000860 + +#define NVC0_2D_UNK0870						0x00000870 + +#define NVC0_2D_UNK0880						0x00000880 + +#define NVC0_2D_UNK0884						0x00000884 + +#define NVC0_2D_UNK0888						0x00000888 + +#define NVC0_2D_BLIT_CONTROL					0x0000088c +#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK			0x00000001 +#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT			0 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER			0x00000000 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER			0x00000001 +#define NVC0_2D_BLIT_CONTROL_FILTER__MASK			0x00000010 +#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT			4 +#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE		0x00000000 +#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR			0x00000010 + +#define NVC0_2D_BLIT_DST_X					0x000008b0 + +#define NVC0_2D_BLIT_DST_Y					0x000008b4 + +#define NVC0_2D_BLIT_DST_W					0x000008b8 + +#define NVC0_2D_BLIT_DST_H					0x000008bc + +#define NVC0_2D_BLIT_DU_DX_FRACT				0x000008c0 + +#define NVC0_2D_BLIT_DU_DX_INT					0x000008c4 + +#define NVC0_2D_BLIT_DV_DY_FRACT				0x000008c8 + +#define NVC0_2D_BLIT_DV_DY_INT					0x000008cc + +#define NVC0_2D_BLIT_SRC_X_FRACT				0x000008d0 + +#define NVC0_2D_BLIT_SRC_X_INT					0x000008d4 + +#define NVC0_2D_BLIT_SRC_Y_FRACT				0x000008d8 + +#define NVC0_2D_BLIT_SRC_Y_INT					0x000008dc + + +#endif /* NVC0_2D_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h new file mode 100644 index 0000000000..1346d99940 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -0,0 +1,1101 @@ +#ifndef NVC0_3D_XML +#define NVC0_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml    (  28058 bytes, from 2010-11-26 18:05:20) +- copyright.xml  (   6452 bytes, from 2010-11-25 23:28:20) +- nv_defs.xml    (   4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml  (  16394 bytes, from 2010-10-09 08:27:14) +- nv_object.xml  (  11547 bytes, from 2010-11-26 16:41:56) +- nvchipsets.xml (   3074 bytes, from 2010-11-07 00:36:28) +- nv50_defs.xml  (   4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_3D_NOTIFY_ADDRESS_HIGH				0x00000104 +#define NVC0_3D_NOTIFY_ADDRESS_LOW				0x00000108 +#define NVC0_3D_NOTIFY						0x0000010c + +#define NVC0_3D_SERIALIZE					0x00000110 + +#define NVC0_3D_EARLY_FRAGMENT_TESTS				0x00000210 + +#define NVC0_3D_TESS_MODE					0x00000320 +#define NVC0_3D_TESS_MODE_PRIM__MASK				0x0000000f +#define NVC0_3D_TESS_MODE_PRIM__SHIFT				0 +#define NVC0_3D_TESS_MODE_PRIM_ISOLINES				0x00000000 +#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES			0x00000001 +#define NVC0_3D_TESS_MODE_PRIM_QUADS				0x00000002 +#define NVC0_3D_TESS_MODE_SPACING__MASK				0x000000f0 +#define NVC0_3D_TESS_MODE_SPACING__SHIFT			4 +#define NVC0_3D_TESS_MODE_SPACING_EQUAL				0x00000000 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD		0x00000010 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN		0x00000020 +#define NVC0_3D_TESS_MODE_CW					0x00000100 +#define NVC0_3D_TESS_MODE_CONNECTED				0x00000200 + +#define NVC0_3D_TESS_LEVEL_OUTER(i0)			       (0x00000324 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE				0x00000004 +#define NVC0_3D_TESS_LEVEL_OUTER__LEN				0x00000004 + +#define NVC0_3D_TESS_LEVEL_INNER(i0)			       (0x00000334 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_INNER__ESIZE				0x00000004 +#define NVC0_3D_TESS_LEVEL_INNER__LEN				0x00000002 + +#define NVC0_3D_RASTERIZE_ENABLE				0x0000037c + +#define NVC0_3D_TFB(i0)					       (0x00000380 + 0x20*(i0)) +#define NVC0_3D_TFB__ESIZE					0x00000020 +#define NVC0_3D_TFB__LEN					0x00000004 + +#define NVC0_3D_TFB_BUFFER_ENABLE(i0)			       (0x00000380 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_HIGH(i0)			       (0x00000384 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_LOW(i0)			       (0x00000388 + 0x20*(i0)) + +#define NVC0_3D_TFB_BUFFER_SIZE(i0)			       (0x0000038c + 0x20*(i0)) + +#define NVC0_3D_TFB_PRIMITIVE_ID(i0)			       (0x00000390 + 0x20*(i0)) + +#define NVC0_3D_TFB_UNK0700(i0)				       (0x00000700 + 0x10*(i0)) + +#define NVC0_3D_TFB_VARYING_COUNT(i0)			       (0x00000704 + 0x10*(i0)) + +#define NVC0_3D_TFB_BUFFER_STRIDE(i0)			       (0x00000708 + 0x10*(i0)) + +#define NVC0_3D_TFB_ENABLE					0x00000744 + +#define NVC0_3D_LOCAL_BASE					0x0000077c + +#define NVC0_3D_LOCAL_ADDRESS_HIGH				0x00000790 + +#define NVC0_3D_LOCAL_ADDRESS_LOW				0x00000794 + +#define NVC0_3D_LOCAL_SIZE_HIGH					0x00000798 + +#define NVC0_3D_LOCAL_SIZE_LOW					0x0000079c + +#define NVC0_3D_RT(i0)					       (0x00000800 + 0x20*(i0)) +#define NVC0_3D_RT__ESIZE					0x00000020 +#define NVC0_3D_RT__LEN						0x00000008 + +#define NVC0_3D_RT_ADDRESS_HIGH(i0)			       (0x00000800 + 0x20*(i0)) + +#define NVC0_3D_RT_ADDRESS_LOW(i0)			       (0x00000804 + 0x20*(i0)) + +#define NVC0_3D_RT_HORIZ(i0)				       (0x00000808 + 0x20*(i0)) + +#define NVC0_3D_RT_VERT(i0)				       (0x0000080c + 0x20*(i0)) + +#define NVC0_3D_RT_FORMAT(i0)				       (0x00000810 + 0x20*(i0)) + +#define NVC0_3D_RT_TILE_MODE(i0)			       (0x00000814 + 0x20*(i0)) +#define NVC0_3D_RT_TILE_MODE_UNK0				0x00000001 +#define NVC0_3D_RT_TILE_MODE_Y__MASK				0x00000070 +#define NVC0_3D_RT_TILE_MODE_Y__SHIFT				4 +#define NVC0_3D_RT_TILE_MODE_Z__MASK				0x00000700 +#define NVC0_3D_RT_TILE_MODE_Z__SHIFT				8 + +#define NVC0_3D_RT_ARRAY_MODE(i0)			       (0x00000818 + 0x20*(i0)) +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK			0x0000ffff +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT			0 +#define NVC0_3D_RT_ARRAY_MODE_VOLUME				0x00010000 + +#define NVC0_3D_RT_LAYER_STRIDE(i0)			       (0x0000081c + 0x20*(i0)) + +#define NVC0_3D_VIEWPORT_SCALE_X(i0)			       (0x00000a00 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE				0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_X__LEN				0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Y(i0)			       (0x00000a04 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE				0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Y__LEN				0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Z(i0)			       (0x00000a08 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE				0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Z__LEN				0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0)		       (0x00000a0c + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE			0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN			0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0)		       (0x00000a10 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE			0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN			0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0)		       (0x00000a14 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE			0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN			0x00000010 + +#define NVC0_3D_VIEWPORT_HORIZ(i0)			       (0x00000c00 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_HORIZ__ESIZE				0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ__LEN				0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ_X__MASK				0x0000ffff +#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT				0 +#define NVC0_3D_VIEWPORT_HORIZ_W__MASK				0xffff0000 +#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT				16 + +#define NVC0_3D_VIEWPORT_VERT(i0)			       (0x00000c04 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_VERT__ESIZE				0x00000010 +#define NVC0_3D_VIEWPORT_VERT__LEN				0x00000010 +#define NVC0_3D_VIEWPORT_VERT_Y__MASK				0x0000ffff +#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT				0 +#define NVC0_3D_VIEWPORT_VERT_H__MASK				0xffff0000 +#define NVC0_3D_VIEWPORT_VERT_H__SHIFT				16 + +#define NVC0_3D_DEPTH_RANGE_NEAR(i0)			       (0x00000c08 + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE				0x00000010 +#define NVC0_3D_DEPTH_RANGE_NEAR__LEN				0x00000010 + +#define NVC0_3D_DEPTH_RANGE_FAR(i0)			       (0x00000c0c + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE				0x00000010 +#define NVC0_3D_DEPTH_RANGE_FAR__LEN				0x00000010 + +#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0)			       (0x00000d00 + 0x8*(i0)) +#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE			0x00000008 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN			0x00000008 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK			0x0000ffff +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT			0 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK			0xffff0000 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT			16 + +#define NVC0_3D_VIEWPORT_CLIP_VERT(i0)			       (0x00000d04 + 0x8*(i0)) +#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE			0x00000008 +#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN				0x00000008 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK			0x0000ffff +#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT			0 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK			0xffff0000 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT			16 + +#define NVC0_3D_CLIPID_REGION_HORIZ(i0)			       (0x00000d40 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE			0x00000008 +#define NVC0_3D_CLIPID_REGION_HORIZ__LEN			0x00000004 +#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK			0x0000ffff +#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT			0 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK			0xffff0000 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT			16 + +#define NVC0_3D_CLIPID_REGION_VERT(i0)			       (0x00000d44 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_VERT__ESIZE			0x00000008 +#define NVC0_3D_CLIPID_REGION_VERT__LEN				0x00000004 +#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK			0x0000ffff +#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT			0 +#define NVC0_3D_CLIPID_REGION_VERT_H__MASK			0xffff0000 +#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT			16 + +#define NVC0_3D_VERTEX_BUFFER_FIRST				0x00000d74 + +#define NVC0_3D_VERTEX_BUFFER_COUNT				0x00000d78 + +#define NVC0_3D_CLEAR_COLOR(i0)				       (0x00000d80 + 0x4*(i0)) +#define NVC0_3D_CLEAR_COLOR__ESIZE				0x00000004 +#define NVC0_3D_CLEAR_COLOR__LEN				0x00000004 + +#define NVC0_3D_CLEAR_DEPTH					0x00000d90 + +#define NVC0_3D_CLEAR_STENCIL					0x00000da0 + +#define NVC0_3D_POLYGON_SMOOTH_ENABLE				0x00000db4 + +#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE			0x00000dc0 + +#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE			0x00000dc4 + +#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE			0x00000dc8 + +#define NVC0_3D_PATCH_VERTICES					0x00000dcc + +#define NVC0_3D_WINDOW_OFFSET_X					0x00000df8 + +#define NVC0_3D_WINDOW_OFFSET_Y					0x00000dfc + +#define NVC0_3D_SCISSOR_ENABLE(i0)			       (0x00000e00 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_ENABLE__ESIZE				0x00000010 +#define NVC0_3D_SCISSOR_ENABLE__LEN				0x00000010 + +#define NVC0_3D_SCISSOR_HORIZ(i0)			       (0x00000e04 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_HORIZ__ESIZE				0x00000010 +#define NVC0_3D_SCISSOR_HORIZ__LEN				0x00000010 +#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK				0x0000ffff +#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT			0 +#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK				0xffff0000 +#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT			16 + +#define NVC0_3D_SCISSOR_VERT(i0)			       (0x00000e08 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_VERT__ESIZE				0x00000010 +#define NVC0_3D_SCISSOR_VERT__LEN				0x00000010 +#define NVC0_3D_SCISSOR_VERT_MIN__MASK				0x0000ffff +#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT				0 +#define NVC0_3D_SCISSOR_VERT_MAX__MASK				0xffff0000 +#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT				16 + +#define NVC0_3D_STENCIL_BACK_FUNC_REF				0x00000f54 + +#define NVC0_3D_STENCIL_BACK_MASK				0x00000f58 + +#define NVC0_3D_STENCIL_BACK_FUNC_MASK				0x00000f5c + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH			0x00000f84 + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW			0x00000f88 + +#define NVC0_3D_DEPTH_BOUNDS(i0)			       (0x00000f9c + 0x4*(i0)) +#define NVC0_3D_DEPTH_BOUNDS__ESIZE				0x00000004 +#define NVC0_3D_DEPTH_BOUNDS__LEN				0x00000002 + +#define NVC0_3D_MSAA_MASK(i0)				       (0x00000fbc + 0x4*(i0)) +#define NVC0_3D_MSAA_MASK__ESIZE				0x00000004 +#define NVC0_3D_MSAA_MASK__LEN					0x00000004 + +#define NVC0_3D_CLIPID_ADDRESS_HIGH				0x00000fcc + +#define NVC0_3D_CLIPID_ADDRESS_LOW				0x00000fd0 + +#define NVC0_3D_ZETA_ADDRESS_HIGH				0x00000fe0 + +#define NVC0_3D_ZETA_ADDRESS_LOW				0x00000fe4 + +#define NVC0_3D_ZETA_FORMAT					0x00000fe8 + +#define NVC0_3D_ZETA_TILE_MODE					0x00000fec + +#define NVC0_3D_ZETA_LAYER_STRIDE				0x00000ff0 + +#define NVC0_3D_SCREEN_SCISSOR_HORIZ				0x00000ff4 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK			0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT			16 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK			0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT			0 + +#define NVC0_3D_SCREEN_SCISSOR_VERT				0x00000ff8 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK			0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT			16 +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK			0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT			0 + +#define NVC0_3D_VERTEX_ID					0x00001118 + +#define NVC0_3D_VTX_ATTR_DEFINE					0x0000114c +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK			0x000000ff +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT			0 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK			0x00000700 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT			8 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN			0x00000001 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX			0x00000004 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK			0x00007000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT			12 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8				0x00001000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16				0x00002000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32				0x00004000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK			0x00070000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT			16 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM			0x00010000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM			0x00020000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT			0x00030000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT			0x00040000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED			0x00050000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED			0x00060000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT			0x00070000 + +#define NVC0_3D_VTX_ATTR_DATA(i0)			       (0x00001150 + 0x4*(i0)) +#define NVC0_3D_VTX_ATTR_DATA__ESIZE				0x00000004 +#define NVC0_3D_VTX_ATTR_DATA__LEN				0x00000004 + +#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0)		       (0x00001160 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE			0x00000004 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN			0x00000020 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK		0x0000003f +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT		0 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST			0x00000040 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK		0x001fff80 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT		7 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK			0x07e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT		21 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32		0x00200000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32		0x00400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16		0x00600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32			0x00800000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16		0x00a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8		0x01400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16			0x01e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32			0x02400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8			0x02600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8			0x03000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16			0x03600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8			0x03a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10		0x06000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK			0x78000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT		27 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM			0x08000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM			0x10000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT			0x18000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT			0x20000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED		0x28000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED		0x30000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT			0x38000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA			0x80000000 + +#define NVC0_3D_RT_CONTROL					0x0000121c +#define NVC0_3D_RT_CONTROL_COUNT__MASK				0x0000000f +#define NVC0_3D_RT_CONTROL_COUNT__SHIFT				0 +#define NVC0_3D_RT_CONTROL_MAP0__MASK				0x00000070 +#define NVC0_3D_RT_CONTROL_MAP0__SHIFT				4 +#define NVC0_3D_RT_CONTROL_MAP1__MASK				0x00000380 +#define NVC0_3D_RT_CONTROL_MAP1__SHIFT				7 +#define NVC0_3D_RT_CONTROL_MAP2__MASK				0x00001c00 +#define NVC0_3D_RT_CONTROL_MAP2__SHIFT				10 +#define NVC0_3D_RT_CONTROL_MAP3__MASK				0x0000e000 +#define NVC0_3D_RT_CONTROL_MAP3__SHIFT				13 +#define NVC0_3D_RT_CONTROL_MAP4__MASK				0x00070000 +#define NVC0_3D_RT_CONTROL_MAP4__SHIFT				16 +#define NVC0_3D_RT_CONTROL_MAP5__MASK				0x00380000 +#define NVC0_3D_RT_CONTROL_MAP5__SHIFT				19 +#define NVC0_3D_RT_CONTROL_MAP6__MASK				0x01c00000 +#define NVC0_3D_RT_CONTROL_MAP6__SHIFT				22 +#define NVC0_3D_RT_CONTROL_MAP7__MASK				0x0e000000 +#define NVC0_3D_RT_CONTROL_MAP7__SHIFT				25 + +#define NVC0_3D_ZETA_HORIZ					0x00001228 + +#define NVC0_3D_ZETA_VERT					0x0000122c + +#define NVC0_3D_ZETA_ARRAY_MODE					0x00001230 +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK			0x0000ffff +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT			0 +#define NVC0_3D_ZETA_ARRAY_MODE_UNK				0x00010000 + +#define NVC0_3D_LINKED_TSC					0x00001234 + +#define NVC0_3D_FP_RESULT_COUNT					0x00001298 + +#define NVC0_3D_DEPTH_TEST_ENABLE				0x000012cc + +#define NVC0_3D_D3D_FILL_MODE					0x000012d0 +#define NVC0_3D_D3D_FILL_MODE_POINT				0x00000001 +#define NVC0_3D_D3D_FILL_MODE_WIREFRAME				0x00000002 +#define NVC0_3D_D3D_FILL_MODE_SOLID				0x00000003 + +#define NVC0_3D_SHADE_MODEL					0x000012d4 +#define NVC0_3D_SHADE_MODEL_FLAT				0x00001d00 +#define NVC0_3D_SHADE_MODEL_SMOOTH				0x00001d01 + +#define NVC0_3D_BLEND_INDEPENDENT				0x000012e4 + +#define NVC0_3D_DEPTH_WRITE_ENABLE				0x000012e8 + +#define NVC0_3D_ALPHA_TEST_ENABLE				0x000012ec + +#define NVC0_3D_VB_ELEMENT_U8_SETUP				0x00001300 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK		0xc0000000 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT		30 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK			0x3fffffff +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT		0 + +#define NVC0_3D_VB_ELEMENT_U8					0x00001304 +#define NVC0_3D_VB_ELEMENT_U8_I0__MASK				0x000000ff +#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT				0 +#define NVC0_3D_VB_ELEMENT_U8_I1__MASK				0x0000ff00 +#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT				8 +#define NVC0_3D_VB_ELEMENT_U8_I2__MASK				0x00ff0000 +#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT				16 +#define NVC0_3D_VB_ELEMENT_U8_I3__MASK				0xff000000 +#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT				24 + +#define NVC0_3D_D3D_CULL_MODE					0x00001308 +#define NVC0_3D_D3D_CULL_MODE_NONE				0x00000001 +#define NVC0_3D_D3D_CULL_MODE_FRONT				0x00000002 +#define NVC0_3D_D3D_CULL_MODE_BACK				0x00000003 + +#define NVC0_3D_DEPTH_TEST_FUNC					0x0000130c +#define NVC0_3D_DEPTH_TEST_FUNC_NEVER				0x00000200 +#define NVC0_3D_DEPTH_TEST_FUNC_LESS				0x00000201 +#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL				0x00000202 +#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL				0x00000203 +#define NVC0_3D_DEPTH_TEST_FUNC_GREATER				0x00000204 +#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL			0x00000205 +#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL				0x00000206 +#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS				0x00000207 + +#define NVC0_3D_ALPHA_TEST_REF					0x00001310 + +#define NVC0_3D_ALPHA_TEST_FUNC					0x00001314 +#define NVC0_3D_ALPHA_TEST_FUNC_NEVER				0x00000200 +#define NVC0_3D_ALPHA_TEST_FUNC_LESS				0x00000201 +#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL				0x00000202 +#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL				0x00000203 +#define NVC0_3D_ALPHA_TEST_FUNC_GREATER				0x00000204 +#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL			0x00000205 +#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL				0x00000206 +#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS				0x00000207 + +#define NVC0_3D_BLEND_COLOR(i0)				       (0x0000131c + 0x4*(i0)) +#define NVC0_3D_BLEND_COLOR__ESIZE				0x00000004 +#define NVC0_3D_BLEND_COLOR__LEN				0x00000004 + +#define NVC0_3D_TSC_FLUSH					0x00001330 +#define NVC0_3D_TSC_FLUSH_UNK0					0x00000001 +#define NVC0_3D_TSC_FLUSH_UNK1__MASK				0x03fffff0 +#define NVC0_3D_TSC_FLUSH_UNK1__SHIFT				4 + +#define NVC0_3D_TIC_FLUSH					0x00001334 +#define NVC0_3D_TIC_FLUSH_UNK0					0x00000001 +#define NVC0_3D_TIC_FLUSH_UNK1__MASK				0x03fffff0 +#define NVC0_3D_TIC_FLUSH_UNK1__SHIFT				4 + +#define NVC0_3D_TEX_CACHE_CTL					0x00001338 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK			0x00000030 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT			4 + +#define NVC0_3D_BLEND_EQUATION_RGB				0x00001340 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD			0x00008006 +#define NVC0_3D_BLEND_EQUATION_RGB_MIN				0x00008007 +#define NVC0_3D_BLEND_EQUATION_RGB_MAX				0x00008008 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT		0x0000800a +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT	0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_RGB				0x00001344 + +#define NVC0_3D_BLEND_FUNC_DST_RGB				0x00001348 + +#define NVC0_3D_BLEND_EQUATION_ALPHA				0x0000134c +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD			0x00008006 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN			0x00008007 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX			0x00008008 +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT		0x0000800a +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT	0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_ALPHA				0x00001350 + +#define NVC0_3D_BLEND_FUNC_DST_ALPHA				0x00001358 + +#define NVC0_3D_BLEND_ENABLE(i0)			       (0x00001360 + 0x4*(i0)) +#define NVC0_3D_BLEND_ENABLE__ESIZE				0x00000004 +#define NVC0_3D_BLEND_ENABLE__LEN				0x00000008 + +#define NVC0_3D_STENCIL_FRONT_ENABLE				0x00001380 + +#define NVC0_3D_STENCIL_FRONT_OP_FAIL				0x00001384 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO			0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT			0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP			0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE			0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR			0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR			0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP			0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP			0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL				0x00001388 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO			0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT			0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP			0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE			0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR			0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR			0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP		0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP		0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS				0x0000138c +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO			0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT			0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP			0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE			0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR			0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR			0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP		0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP		0x00008508 + +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC				0x00001390 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER			0x00000200 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS			0x00000201 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL			0x00000202 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL			0x00000203 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER			0x00000204 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL		0x00000205 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL			0x00000206 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS			0x00000207 + +#define NVC0_3D_STENCIL_FRONT_FUNC_REF				0x00001394 + +#define NVC0_3D_STENCIL_FRONT_MASK				0x00001398 + +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK				0x0000139c + +#define NVC0_3D_FRAG_COLOR_CLAMP_EN				0x000013a8 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0				0x00000001 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1				0x00000010 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2				0x00000100 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3				0x00001000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4				0x00010000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5				0x00100000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6				0x01000000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7				0x10000000 + +#define NVC0_3D_Y_ORIGIN_BOTTOM					0x000013ac + +#define NVC0_3D_LINE_WIDTH					0x000013b0 + +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT				0x00001420 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN			0x00000001 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX			0x00000400 + +#define NVC0_3D_FENCE_UNK					0x0000142c + +#define NVC0_3D_VB_ELEMENT_BASE					0x00001434 + +#define NVC0_3D_VB_INSTANCE_BASE				0x00001438 + +#define NVC0_3D_CODE_CB_FLUSH					0x00001440 + +#define NVC0_3D_CLIPID_HEIGHT					0x00001504 +#define NVC0_3D_CLIPID_HEIGHT__MAX				0x00002000 + +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE				0x00001510 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0			0x00000001 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1			0x00000002 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_2			0x00000004 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_3			0x00000008 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_4			0x00000010 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_5			0x00000020 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_6			0x00000040 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_7			0x00000080 + +#define NVC0_3D_SAMPLECNT_ENABLE				0x00001514 + +#define NVC0_3D_POINT_SIZE					0x00001518 + +#define NVC0_3D_POINT_SPRITE_ENABLE				0x00001520 + +#define NVC0_3D_SAMPLECNT_RESET					0x00001530 + +#define NVC0_3D_MULTISAMPLE_ZETA_ENABLE				0x00001534 + +#define NVC0_3D_ZETA_ENABLE					0x00001538 + +#define NVC0_3D_MULTISAMPLE_CTRL				0x0000153c +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE		0x00000001 +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE			0x00000010 + +#define NVC0_3D_COND_ADDRESS_HIGH				0x00001550 + +#define NVC0_3D_COND_ADDRESS_LOW				0x00001554 + +#define NVC0_3D_COND_MODE					0x00001558 +#define NVC0_3D_COND_MODE_NEVER					0x00000000 +#define NVC0_3D_COND_MODE_ALWAYS				0x00000001 +#define NVC0_3D_COND_MODE_RES_NON_ZERO				0x00000002 +#define NVC0_3D_COND_MODE_EQUAL					0x00000003 +#define NVC0_3D_COND_MODE_NOT_EQUAL				0x00000004 + +#define NVC0_3D_TSC_ADDRESS_HIGH				0x0000155c + +#define NVC0_3D_TSC_ADDRESS_LOW					0x00001560 +#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN				0x00000020 + +#define NVC0_3D_TSC_LIMIT					0x00001564 +#define NVC0_3D_TSC_LIMIT__MAX					0x00001fff + +#define NVC0_3D_POLYGON_OFFSET_FACTOR				0x0000156c + +#define NVC0_3D_LINE_SMOOTH_ENABLE				0x00001570 + +#define NVC0_3D_TIC_ADDRESS_HIGH				0x00001574 + +#define NVC0_3D_TIC_ADDRESS_LOW					0x00001578 + +#define NVC0_3D_TIC_LIMIT					0x0000157c + +#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE				0x00001594 + +#define NVC0_3D_STENCIL_BACK_OP_FAIL				0x00001598 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO			0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT			0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP			0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE			0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR			0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR			0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP			0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP			0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL				0x0000159c +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO			0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT			0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP			0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE			0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR			0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR			0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP			0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP			0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZPASS				0x000015a0 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO			0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT			0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP			0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE			0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR			0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR			0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP			0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP			0x00008508 + +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC				0x000015a4 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER			0x00000200 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS			0x00000201 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL			0x00000202 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL			0x00000203 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER			0x00000204 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL			0x00000205 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL			0x00000206 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS			0x00000207 + +#define NVC0_3D_MULTISAMPLE_COLOR_ENABLE			0x000015b4 + +#define NVC0_3D_FRAMEBUFFER_SRGB				0x000015b8 + +#define NVC0_3D_POLYGON_OFFSET_UNITS				0x000015bc + +#define NVC0_3D_GP_BUILTIN_RESULT_EN				0x000015cc +#define NVC0_3D_GP_BUILTIN_RESULT_EN_LAYER			0x00010000 + +#define NVC0_3D_MULTISAMPLE_MODE				0x000015d0 +#define NVC0_3D_MULTISAMPLE_MODE_1X				0x00000000 +#define NVC0_3D_MULTISAMPLE_MODE_2XMS				0x00000001 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS				0x00000002 +#define NVC0_3D_MULTISAMPLE_MODE_8XMS				0x00000003 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS			0x00000008 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS			0x00000009 +#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS			0x0000000a + +#define NVC0_3D_VERTEX_BEGIN_D3D				0x000015d4 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK		0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT		0 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS		0x00000001 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES		0x00000002 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP		0x00000003 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES		0x00000004 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP	0x00000005 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY	0x0000000a +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY	0x0000000b +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY	0x0000000c +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY	0x0000000d +#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT			0x10000000 + +#define NVC0_3D_VERTEX_END_D3D					0x000015d8 +#define NVC0_3D_VERTEX_END_D3D_UNK0				0x00000001 +#define NVC0_3D_VERTEX_END_D3D_UNK1				0x00000002 + +#define NVC0_3D_EDGEFLAG_ENABLE					0x000015e4 + +#define NVC0_3D_VB_ELEMENT_U32					0x000015e8 + +#define NVC0_3D_VB_ELEMENT_U16_SETUP				0x000015ec +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK		0xc0000000 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT		30 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK		0x3fffffff +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT		0 + +#define NVC0_3D_VB_ELEMENT_U16					0x000015f0 +#define NVC0_3D_VB_ELEMENT_U16_I0__MASK				0x0000ffff +#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT			0 +#define NVC0_3D_VB_ELEMENT_U16_I1__MASK				0xffff0000 +#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT			16 + +#define NVC0_3D_VERTEX_BASE_HIGH				0x000015f4 + +#define NVC0_3D_VERTEX_BASE_LOW					0x000015f8 + +#define NVC0_3D_POINT_COORD_REPLACE				0x00001604 +#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK			0x00001fff +#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT			0 + +#define NVC0_3D_CODE_ADDRESS_HIGH				0x00001608 + +#define NVC0_3D_CODE_ADDRESS_LOW				0x0000160c + +#define NVC0_3D_VERTEX_END_GL					0x00001614 +#define NVC0_3D_VERTEX_END_GL_UNK0				0x00000001 +#define NVC0_3D_VERTEX_END_GL_UNK1				0x00000002 + +#define NVC0_3D_VERTEX_BEGIN_GL					0x00001618 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK			0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT		0 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS		0x00000000 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES			0x00000001 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP		0x00000002 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP		0x00000003 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES		0x00000004 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP	0x00000005 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN		0x00000006 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS			0x00000007 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP		0x00000008 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON		0x00000009 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY	0x0000000a +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY	0x0000000b +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY	0x0000000c +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY	0x0000000d +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES		0x0000000e +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT			0x10000000 + +#define NVC0_3D_VERTEX_DATA					0x00001640 + +#define NVC0_3D_PRIM_RESTART_ENABLE				0x00001644 + +#define NVC0_3D_PRIM_RESTART_INDEX				0x00001648 + +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN				0x0000164c +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID			0x00000001 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID		0x00000010 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID		0x00000100 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12			0x00001000 + +#define NVC0_3D_POINT_SMOOTH_ENABLE				0x00001658 + +#define NVC0_3D_POINT_SPRITE_CTRL				0x00001660 + +#define NVC0_3D_TEX_MISC					0x00001664 +#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP			0x00000004 + +#define NVC0_3D_LINE_STIPPLE_ENABLE				0x0000166c + +#define NVC0_3D_LINE_STIPPLE_PATTERN				0x00001680 + +#define NVC0_3D_PROVOKING_VERTEX_LAST				0x00001684 + +#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE				0x00001688 + +#define NVC0_3D_POLYGON_STIPPLE_ENABLE				0x0000168c + +#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0)		       (0x00001700 + 0x4*(i0)) +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE			0x00000004 +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN			0x00000020 + +#define NVC0_3D_STRMOUT_UNK1780(i0)			       (0x00001780 + 0x4*(i0)) +#define NVC0_3D_STRMOUT_UNK1780__ESIZE				0x00000004 +#define NVC0_3D_STRMOUT_UNK1780__LEN				0x00000004 + +#define NVC0_3D_UNK17BC_ADDRESS_HIGH				0x000017bc + +#define NVC0_3D_UNK17BC_ADDRESS_LOW				0x000017c0 + +#define NVC0_3D_UNK17BC_LIMIT					0x000017c4 + +#define NVC0_3D_INDEX_ARRAY_START_HIGH				0x000017c8 + +#define NVC0_3D_INDEX_ARRAY_START_LOW				0x000017cc + +#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH				0x000017d0 + +#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW				0x000017d4 + +#define NVC0_3D_INDEX_LOG2_SIZE					0x000017d8 + +#define NVC0_3D_INDEX_BATCH_FIRST				0x000017dc + +#define NVC0_3D_INDEX_BATCH_COUNT				0x000017e0 + +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0)		       (0x00001880 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE		0x00000004 +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN			0x00000020 + +#define NVC0_3D_VP_POINT_SIZE_EN				0x00001910 + +#define NVC0_3D_CULL_FACE_ENABLE				0x00001918 + +#define NVC0_3D_FRONT_FACE					0x0000191c +#define NVC0_3D_FRONT_FACE_CW					0x00000900 +#define NVC0_3D_FRONT_FACE_CCW					0x00000901 + +#define NVC0_3D_CULL_FACE					0x00001920 +#define NVC0_3D_CULL_FACE_FRONT					0x00000404 +#define NVC0_3D_CULL_FACE_BACK					0x00000405 +#define NVC0_3D_CULL_FACE_FRONT_AND_BACK			0x00000408 + +#define NVC0_3D_VIEWPORT_TRANSFORM_EN				0x0000192c + +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL				0x0000193c +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0			0x00000001 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1			0x00000002 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2			0x00000004 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3			0x00000008 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4			0x00000010 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7			0x00000080 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10			0x00000400 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11			0x00000800 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12			0x00001000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13			0x00002000 + +#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN				0x0000194c + +#define NVC0_3D_VIEWPORT_CLIP_MODE				0x00001950 +#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY			0x00000000 +#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL			0x00000001 +#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER			0x00000002 + +#define NVC0_3D_FP_ZORDER_CTRL					0x0000196c +#define NVC0_3D_FP_ZORDER_CTRL_0				0x00000001 +#define NVC0_3D_FP_ZORDER_CTRL_1				0x00000010 + +#define NVC0_3D_CLIPID_ENABLE					0x0000197c + +#define NVC0_3D_CLIPID_WIDTH					0x00001980 +#define NVC0_3D_CLIPID_WIDTH__MAX				0x00002000 +#define NVC0_3D_CLIPID_WIDTH__ALIGN				0x00000040 + +#define NVC0_3D_CLIPID_ID					0x00001984 + +#define NVC0_3D_FP_CONTROL					0x000019a8 +#define NVC0_3D_FP_CONTROL_MULTIPLE_RESULTS			0x00000001 +#define NVC0_3D_FP_CONTROL_EXPORTS_Z				0x00000100 +#define NVC0_3D_FP_CONTROL_USES_KIL				0x00100000 + +#define NVC0_3D_DEPTH_BOUNDS_EN					0x000019bc + +#define NVC0_3D_LOGIC_OP_ENABLE					0x000019c4 + +#define NVC0_3D_LOGIC_OP					0x000019c8 +#define NVC0_3D_LOGIC_OP_CLEAR					0x00001500 +#define NVC0_3D_LOGIC_OP_AND					0x00001501 +#define NVC0_3D_LOGIC_OP_AND_REVERSE				0x00001502 +#define NVC0_3D_LOGIC_OP_COPY					0x00001503 +#define NVC0_3D_LOGIC_OP_AND_INVERTED				0x00001504 +#define NVC0_3D_LOGIC_OP_NOOP					0x00001505 +#define NVC0_3D_LOGIC_OP_XOR					0x00001506 +#define NVC0_3D_LOGIC_OP_OR					0x00001507 +#define NVC0_3D_LOGIC_OP_NOR					0x00001508 +#define NVC0_3D_LOGIC_OP_EQUIV					0x00001509 +#define NVC0_3D_LOGIC_OP_INVERT					0x0000150a +#define NVC0_3D_LOGIC_OP_OR_REVERSE				0x0000150b +#define NVC0_3D_LOGIC_OP_COPY_INVERTED				0x0000150c +#define NVC0_3D_LOGIC_OP_OR_INVERTED				0x0000150d +#define NVC0_3D_LOGIC_OP_NAND					0x0000150e +#define NVC0_3D_LOGIC_OP_SET					0x0000150f + +#define NVC0_3D_CLEAR_BUFFERS					0x000019d0 +#define NVC0_3D_CLEAR_BUFFERS_Z					0x00000001 +#define NVC0_3D_CLEAR_BUFFERS_S					0x00000002 +#define NVC0_3D_CLEAR_BUFFERS_R					0x00000004 +#define NVC0_3D_CLEAR_BUFFERS_G					0x00000008 +#define NVC0_3D_CLEAR_BUFFERS_B					0x00000010 +#define NVC0_3D_CLEAR_BUFFERS_A					0x00000020 +#define NVC0_3D_CLEAR_BUFFERS_RT__MASK				0x000003c0 +#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT				6 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK			0x001ffc00 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT			10 + +#define NVC0_3D_COLOR_MASK(i0)				       (0x00001a00 + 0x4*(i0)) +#define NVC0_3D_COLOR_MASK__ESIZE				0x00000004 +#define NVC0_3D_COLOR_MASK__LEN					0x00000008 +#define NVC0_3D_COLOR_MASK_R					0x0000000f +#define NVC0_3D_COLOR_MASK_G					0x000000f0 +#define NVC0_3D_COLOR_MASK_B					0x00000f00 +#define NVC0_3D_COLOR_MASK_A					0x0000f000 + +#define NVC0_3D_QUERY_ADDRESS_HIGH				0x00001b00 + +#define NVC0_3D_QUERY_ADDRESS_LOW				0x00001b04 + +#define NVC0_3D_QUERY_SEQUENCE					0x00001b08 + +#define NVC0_3D_QUERY_GET					0x00001b0c +#define NVC0_3D_QUERY_GET_FENCE					0x1000f010 +#define NVC0_3D_QUERY_GET_SAMPLE_COUNT				0x0100f002 +#define NVC0_3D_QUERY_GET_TFB					0x05805002 +#define NVC0_3D_QUERY_GET_GENERATED_PRIMS			0x06805002 +#define NVC0_3D_QUERY_GET_UNK00005002				0x00005002 + +#define NVC0_3D_VERTEX_ARRAY_FETCH(i0)			       (0x00001c00 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE			0x00000010 +#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN				0x00000020 +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK			0x00000fff +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT		0 +#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE			0x00001000 + +#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0)		       (0x00001c0c + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE			0x00000010 +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN			0x00000020 + +#define NVC0_3D_IBLEND(i0)				       (0x00001e00 + 0x20*(i0)) +#define NVC0_3D_IBLEND__ESIZE					0x00000020 +#define NVC0_3D_IBLEND__LEN					0x00000008 + +#define NVC0_3D_IBLEND_EQUATION_RGB(i0)			       (0x00001e04 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD			0x00008006 +#define NVC0_3D_IBLEND_EQUATION_RGB_MIN				0x00008007 +#define NVC0_3D_IBLEND_EQUATION_RGB_MAX				0x00008008 +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT		0x0000800a +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT	0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0)			       (0x00001e08 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0)			       (0x00001e0c + 0x20*(i0)) + +#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0)		       (0x00001e10 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD			0x00008006 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN			0x00008007 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX			0x00008008 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT		0x0000800a +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT	0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0)		       (0x00001e14 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0)		       (0x00001e18 + 0x20*(i0)) + +#define NVC0_3D_SP(i0)					       (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP__ESIZE					0x00000040 +#define NVC0_3D_SP__LEN						0x00000006 + +#define NVC0_3D_SP_SELECT(i0)				       (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP_SELECT_ENABLE				0x00000001 +#define NVC0_3D_SP_SELECT_PROGRAM__MASK				0x00000070 +#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT			4 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_A				0x00000000 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_B				0x00000010 +#define NVC0_3D_SP_SELECT_PROGRAM_TCP				0x00000020 +#define NVC0_3D_SP_SELECT_PROGRAM_TEP				0x00000030 +#define NVC0_3D_SP_SELECT_PROGRAM_GP				0x00000040 +#define NVC0_3D_SP_SELECT_PROGRAM_FP				0x00000050 + +#define NVC0_3D_SP_START_ID(i0)				       (0x00002004 + 0x40*(i0)) + +#define NVC0_3D_SP_GPR_ALLOC(i0)			       (0x0000200c + 0x40*(i0)) + +#define NVC0_3D_TEX_LIMITS(i0)				       (0x00002200 + 0x10*(i0)) +#define NVC0_3D_TEX_LIMITS__ESIZE				0x00000010 +#define NVC0_3D_TEX_LIMITS__LEN					0x00000005 + +#define NVC0_3D_CB_SIZE						0x00002380 + +#define NVC0_3D_CB_ADDRESS_HIGH					0x00002384 + +#define NVC0_3D_CB_ADDRESS_LOW					0x00002388 + +#define NVC0_3D_CB_POS						0x0000238c + +#define NVC0_3D_CB_DATA(i0)				       (0x00002390 + 0x4*(i0)) +#define NVC0_3D_CB_DATA__ESIZE					0x00000004 +#define NVC0_3D_CB_DATA__LEN					0x00000010 + +#define NVC0_3D_BIND_TSC(i0)				       (0x00002400 + 0x20*(i0)) +#define NVC0_3D_BIND_TSC__ESIZE					0x00000020 +#define NVC0_3D_BIND_TSC__LEN					0x00000005 +#define NVC0_3D_BIND_TSC_ACTIVE					0x00000001 +#define NVC0_3D_BIND_TSC_SAMPLER__MASK				0x00000ff0 +#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT				4 +#define NVC0_3D_BIND_TSC_TSC__MASK				0x01fff000 +#define NVC0_3D_BIND_TSC_TSC__SHIFT				12 + +#define NVC0_3D_BIND_TIC(i0)				       (0x00002404 + 0x20*(i0)) +#define NVC0_3D_BIND_TIC__ESIZE					0x00000020 +#define NVC0_3D_BIND_TIC__LEN					0x00000005 +#define NVC0_3D_BIND_TIC_ACTIVE					0x00000001 +#define NVC0_3D_BIND_TIC_TEXTURE__MASK				0x000001fe +#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT				1 +#define NVC0_3D_BIND_TIC_TIC__MASK				0x7ffffe00 +#define NVC0_3D_BIND_TIC_TIC__SHIFT				9 + +#define NVC0_3D_CB_BIND(i0)				       (0x00002410 + 0x20*(i0)) +#define NVC0_3D_CB_BIND__ESIZE					0x00000020 +#define NVC0_3D_CB_BIND__LEN					0x00000005 +#define NVC0_3D_CB_BIND_VALID					0x00000001 +#define NVC0_3D_CB_BIND_INDEX__MASK				0x000000f0 +#define NVC0_3D_CB_BIND_INDEX__SHIFT				4 + +#define NVC0_3D_VERT_COLOR_CLAMP_EN				0x00002600 + +#define NVC0_3D_TFB_VARYING_LOCS(i0)			       (0x00002800 + 0x4*(i0)) +#define NVC0_3D_TFB_VARYING_LOCS__ESIZE				0x00000004 +#define NVC0_3D_TFB_VARYING_LOCS__LEN				0x00000080 + +#define NVC0_3D_COLOR_MASK_BROADCAST				0x00003808 + +#define NVC0_3D_VERTEX_ARRAY_SELECT				0x00003820 + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH				0x00003824 + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW				0x00003828 + +#define NVC0_3D_VERTEX_ARRAY_START_HIGH				0x0000382c + +#define NVC0_3D_VERTEX_ARRAY_START_LOW				0x00003830 + +#define NVC0_3D_BLEND_ENABLES					0x00003858 + +#define NVC0_3D_POLYGON_MODE_FRONT				0x00003868 +#define NVC0_3D_POLYGON_MODE_FRONT_POINT			0x00001b00 +#define NVC0_3D_POLYGON_MODE_FRONT_LINE				0x00001b01 +#define NVC0_3D_POLYGON_MODE_FRONT_FILL				0x00001b02 + +#define NVC0_3D_POLYGON_MODE_BACK				0x00003870 +#define NVC0_3D_POLYGON_MODE_BACK_POINT				0x00001b00 +#define NVC0_3D_POLYGON_MODE_BACK_LINE				0x00001b01 +#define NVC0_3D_POLYGON_MODE_BACK_FILL				0x00001b02 + +#define NVC0_3D_GP_SELECT					0x00003878 + +#define NVC0_3D_TEP_SELECT					0x00003880 + + +#endif /* NVC0_3D_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h new file mode 100644 index 0000000000..84b152213a --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h @@ -0,0 +1,98 @@ +#ifndef NV_3DDEFS_XML +#define NV_3DDEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml    (  26312 bytes, from 2010-10-08 10:10:01) +- copyright.xml  (   6498 bytes, from 2010-10-03 13:18:37) +- nv_defs.xml    (   4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml  (  16397 bytes, from 2010-10-08 13:30:38) +- nv_object.xml  (  11249 bytes, from 2010-10-07 15:31:28) +- nvchipsets.xml (   2824 bytes, from 2010-07-07 13:41:20) +- nv50_defs.xml  (   4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_3D_BLEND_FACTOR_ZERO				0x00004000 +#define NV50_3D_BLEND_FACTOR_ONE				0x00004001 +#define NV50_3D_BLEND_FACTOR_SRC_COLOR				0x00004300 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR		0x00004301 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA				0x00004302 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA		0x00004303 +#define NV50_3D_BLEND_FACTOR_DST_ALPHA				0x00004304 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA		0x00004305 +#define NV50_3D_BLEND_FACTOR_DST_COLOR				0x00004306 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR		0x00004307 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE			0x00004308 +#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR			0x0000c001 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR		0x0000c002 +#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA			0x0000c003 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA		0x0000c004 +#define NV50_3D_BLEND_FACTOR_SRC1_COLOR				0x0000c900 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR		0x0000c901 +#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA				0x0000c902 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA		0x0000c903 + +#endif /* NV_3DDEFS_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c new file mode 100644 index 0000000000..93d7f5d303 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -0,0 +1,297 @@ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#define NOUVEAU_NVC0 +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_winsys.h" +#undef NOUVEAU_NVC0 + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#define NVC0_BUFFER_STATUS_USER_MEMORY 0xff + +static INLINE boolean +nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, +                     unsigned domain) +{ +   if (domain == NOUVEAU_BO_VRAM) { +      buf->mm = nvc0_mm_allocate(screen->mm_VRAM, buf->base.width0, &buf->bo, +                                 &buf->offset); +      if (!buf->bo) +         return nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART); +   } else +   if (domain == NOUVEAU_BO_GART) { +      buf->mm = nvc0_mm_allocate(screen->mm_GART, buf->base.width0, &buf->bo, +                                 &buf->offset); +      if (!buf->bo) +         return FALSE; +   } else { +      assert(!domain); +      if (!buf->data) +         buf->data = MALLOC(buf->base.width0); +      if (!buf->data) +         return FALSE; +   } +   buf->domain = domain; +   return TRUE; +} + +static INLINE void +release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) +{ +   (*mm)->next = fence->buffers; +   fence->buffers = (*mm); +   (*mm) = NULL; +} + +static void +nvc0_buffer_destroy(struct pipe_screen *pscreen, +                    struct pipe_resource *presource) +{ +   struct nvc0_screen *screen = nvc0_screen(pscreen); +   struct nvc0_resource *res = nvc0_resource(presource); + +   nouveau_bo_ref(NULL, &res->bo); + +   if (res->mm) +      release_allocation(&res->mm, screen->fence.current); + +   if (res->status != NVC0_BUFFER_STATUS_USER_MEMORY && res->data) +      FREE(res->data); + +   FREE(res); +} + +static INLINE uint32_t +nouveau_buffer_rw_flags(unsigned pipe) +{ +   uint32_t flags = 0; + +   if (pipe & PIPE_TRANSFER_READ) +      flags = NOUVEAU_BO_RD; +   if (pipe & PIPE_TRANSFER_WRITE) +      flags |= NOUVEAU_BO_WR; + +   return flags; +} + +static void * +nvc0_buffer_transfer_map(struct pipe_context *pipe, +                         struct pipe_transfer *transfer) +{ +   struct nvc0_resource *res = nvc0_resource(transfer->resource); +   struct nvc0_fence *fence; +   uint8_t *map; +   int ret; +   uint32_t flags = nouveau_buffer_rw_flags(transfer->usage); + +   if ((res->base.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) && +       (flags & NOUVEAU_BO_WR)) +      nvc0_context(pipe)->vbo_dirty = TRUE; + +   if (res->domain == 0) +      return res->data + transfer->box.x; + +   if (res->domain == NOUVEAU_BO_VRAM) { +      NOUVEAU_ERR("transfers to/from VRAM buffers are not allowed\n"); +      /* if this happens, migrate back to GART */ +      return NULL; +   } + +   if (res->score > -1024) +      --res->score; + +   ret = nouveau_bo_map(res->bo, flags | NOUVEAU_BO_NOSYNC); +   if (ret) +      return NULL; +   map = res->bo->map; +   nouveau_bo_unmap(res->bo); + +   fence = (flags == NOUVEAU_BO_RD) ? res->fence_wr : res->fence; + +   if (fence) { +      if (nvc0_fence_wait(fence) == FALSE) +         NOUVEAU_ERR("failed to fence buffer\n"); + +      nvc0_fence_reference(&res->fence, NULL); +      nvc0_fence_reference(&res->fence_wr, NULL); +   } + +   return map + transfer->box.x + res->offset; +} + + + +static void +nvc0_buffer_transfer_flush_region(struct pipe_context *pipe, +                                  struct pipe_transfer *transfer, +                                  const struct pipe_box *box) +{ +   struct nvc0_resource *res = nvc0_resource(transfer->resource); + +   if (!res->bo) +      return; + +   nouveau_screen_bo_map_flush_range(pipe->screen, +                                     res->bo, +                                     res->offset + transfer->box.x + box->x, +                                     box->width); +} + +static void +nvc0_buffer_transfer_unmap(struct pipe_context *pipe, +                           struct pipe_transfer *transfer) +{ +   struct nvc0_resource *res = nvc0_resource(transfer->resource); + +   if (res->data) +      return; + +   /* nouveau_screen_bo_unmap(pipe->screen, res->bo); */ +} + +const struct u_resource_vtbl nvc0_buffer_vtbl = +{ +   u_default_resource_get_handle,     /* get_handle */ +   nvc0_buffer_destroy,               /* resource_destroy */ +   NULL,                              /* is_resource_referenced */ +   u_default_get_transfer,            /* get_transfer */ +   u_default_transfer_destroy,        /* transfer_destroy */ +   nvc0_buffer_transfer_map,          /* transfer_map */ +   nvc0_buffer_transfer_flush_region, /* transfer_flush_region */ +   nvc0_buffer_transfer_unmap,        /* transfer_unmap */ +   u_default_transfer_inline_write    /* transfer_inline_write */ +}; + +struct pipe_resource * +nvc0_buffer_create(struct pipe_screen *pscreen, +                   const struct pipe_resource *templ) +{ +   struct nvc0_screen *screen = nvc0_screen(pscreen); +   struct nvc0_resource *buffer; +   boolean ret; + +   buffer = CALLOC_STRUCT(nvc0_resource); +   if (!buffer) +      return NULL; + +   buffer->base = *templ; +   buffer->vtbl = &nvc0_buffer_vtbl; +   pipe_reference_init(&buffer->base.reference, 1); +   buffer->base.screen = pscreen; + +   if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER) +      ret = nvc0_buffer_allocate(screen, buffer, 0); +   else +      ret = nvc0_buffer_allocate(screen, buffer, NOUVEAU_BO_GART); + +   if (ret == FALSE) +      goto fail; + +   return &buffer->base; + +fail: +   FREE(buffer); +   return NULL; +} + + +struct pipe_resource * +nvc0_user_buffer_create(struct pipe_screen *pscreen, +                        void *ptr, +                        unsigned bytes, +                        unsigned bind) +{ +   struct nvc0_resource *buffer; + +   buffer = CALLOC_STRUCT(nvc0_resource); +   if (!buffer) +      return NULL; + +   pipe_reference_init(&buffer->base.reference, 1); +   buffer->vtbl = &nvc0_buffer_vtbl; +   buffer->base.screen = pscreen; +   buffer->base.format = PIPE_FORMAT_R8_UNORM; +   buffer->base.usage = PIPE_USAGE_IMMUTABLE; +   buffer->base.bind = bind; +   buffer->base.width0 = bytes; +   buffer->base.height0 = 1; +   buffer->base.depth0 = 1; + +   buffer->data = ptr; +   buffer->status = NVC0_BUFFER_STATUS_USER_MEMORY; + +   return &buffer->base; +} + +/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ +boolean +nvc0_buffer_migrate(struct nvc0_context *nvc0, +                    struct nvc0_resource *buf, unsigned domain) +{ +   struct nvc0_screen *screen = nvc0_screen(buf->base.screen); +   struct nouveau_bo *bo; +   unsigned size = buf->base.width0; +   int ret; + +   if (domain == NOUVEAU_BO_GART && buf->domain == 0) { +      if (!nvc0_buffer_allocate(screen, buf, domain)) +         return FALSE; +      ret = nouveau_bo_map(buf->bo, NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); +      if (ret) +         return ret; +      memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size); +      nouveau_bo_unmap(buf->bo); +   } else +   if (domain == NOUVEAU_BO_VRAM && buf->domain == NOUVEAU_BO_GART) { +      struct nvc0_mm_allocation *mm = buf->mm; + +      bo = buf->bo; +      buf->bo = NULL; +      buf->mm = NULL; +      nvc0_buffer_allocate(screen, buf, domain); + +      nvc0_m2mf_copy_linear(nvc0, buf->bo, 0, NOUVEAU_BO_VRAM, +                            bo, 0, NOUVEAU_BO_GART, buf->base.width0); + +      release_allocation(&mm, screen->fence.current); +      nouveau_bo_ref(NULL, &bo); +   } else +   if (domain == NOUVEAU_BO_VRAM && buf->domain == 0) { +      /* should use a scratch buffer instead here */ +      if (!nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART)) +         return FALSE; +      return nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_VRAM); +   } else +      return -1; + +   buf->domain = domain; + +   return TRUE; +} + +/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART. + * MUST NOT FLUSH THE PUSH BUFFER, we could be in the middle of a method. + */ +boolean +nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size) +{ +   struct nvc0_screen *screen = nvc0_screen(buf->base.screen); +   int ret; + +   assert(buf->data && !buf->domain); + +   if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART)) +      return FALSE; +   ret = nouveau_bo_map_range(buf->bo, base + buf->offset, size, +                              NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); +   if (ret) +      return FALSE; +   memcpy(buf->bo->map, buf->data + base, size); +   nouveau_bo_unmap(buf->bo); + +   return TRUE; +} diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c new file mode 100644 index 0000000000..a05408a678 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -0,0 +1,173 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" + +#include "nvc0_context.h" +#include "nvc0_screen.h" +#include "nvc0_resource.h" + +#include "nouveau/nouveau_reloc.h" + +static void +nvc0_flush(struct pipe_context *pipe, unsigned flags, +           struct pipe_fence_handle **fence) +{ +   struct nvc0_context *nvc0 = nvc0_context(pipe); +   struct nouveau_channel *chan = nvc0->screen->base.channel; + +   if (flags & PIPE_FLUSH_TEXTURE_CACHE) { +      BEGIN_RING(chan, RING_3D(SERIALIZE), 1); +      OUT_RING  (chan, 0); +      BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); +      OUT_RING  (chan, 0x00); +   } + +   if (fence) { +      nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE); +   } + +   if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { +      FIRE_RING(chan); + +      nvc0_screen_fence_next(nvc0->screen); +   } +} + +static void +nvc0_destroy(struct pipe_context *pipe) +{ +   struct nvc0_context *nvc0 = nvc0_context(pipe); + +   draw_destroy(nvc0->draw); + +   if (nvc0->screen->cur_ctx == nvc0) +      nvc0->screen->cur_ctx = NULL; + +   FREE(nvc0); +} + +struct pipe_context * +nvc0_create(struct pipe_screen *pscreen, void *priv) +{ +   struct pipe_winsys *pipe_winsys = pscreen->winsys; +   struct nvc0_screen *screen = nvc0_screen(pscreen); +   struct nvc0_context *nvc0; + +   nvc0 = CALLOC_STRUCT(nvc0_context); +   if (!nvc0) +      return NULL; +   nvc0->screen = screen; + +   nvc0->pipe.winsys = pipe_winsys; +   nvc0->pipe.screen = pscreen; +   nvc0->pipe.priv = priv; + +   nvc0->pipe.destroy = nvc0_destroy; + +   nvc0->pipe.draw_vbo = nvc0_draw_vbo; +   nvc0->pipe.clear = nvc0_clear; + +   nvc0->pipe.flush = nvc0_flush; + +   screen->base.channel->user_private = nvc0; + +   nvc0_init_surface_functions(nvc0); +   nvc0_init_state_functions(nvc0); +   nvc0_init_resource_functions(&nvc0->pipe); + +   nvc0->draw = draw_create(&nvc0->pipe); +   assert(nvc0->draw); +   draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); + +   return &nvc0->pipe; +} + +struct resident { +   struct nouveau_bo *bo; +   uint32_t flags; +}; + +void +nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx, +                         struct nvc0_resource *resource, uint32_t flags) +{ +   struct resident rsd = { NULL, flags }; + +   if (!resource->bo) +      return; +   nouveau_bo_ref(resource->bo, &rsd.bo); + +   util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd); +} + +void +nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx, +                         struct nvc0_resource *resource) +{ +   struct resident *rsd, rem; +   unsigned i; + +   for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) { +      rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i); + +      if (rsd->bo == resource->bo) { +         rem = util_dynarray_pop(&nvc0->residents[ctx], struct resident); +         nouveau_bo_ref(NULL, &rem.bo); +         break; +      } +   } +} + +void +nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) +{ +   unsigned i; + +   for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) +      nouveau_bo_ref(NULL, &util_dynarray_element(&nvc0->residents[ctx], +                                                  struct resident, i)->bo); +   util_dynarray_resize(&nvc0->residents[ctx], 0); +} + +void +nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) +{ +   struct resident *rsd; +   struct util_dynarray *array; +   unsigned ctx, i; + +   for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { +      array = &nvc0->residents[ctx]; + +      for (i = 0; i < array->size / sizeof(struct resident); ++i) { +         rsd = util_dynarray_element(array, struct resident, i); + +         nvc0_make_bo_resident(nvc0, rsd->bo, rsd->flags); +      } +   } + +   nvc0_make_bo_resident(nvc0, nvc0->screen->text, NOUVEAU_BO_RD); +   nvc0_make_bo_resident(nvc0, nvc0->screen->uniforms, NOUVEAU_BO_RD); +   nvc0_make_bo_resident(nvc0, nvc0->screen->txc, NOUVEAU_BO_RD); +} diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h new file mode 100644 index 0000000000..0ea18d74ee --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -0,0 +1,229 @@ +#ifndef __NVC0_CONTEXT_H__ +#define __NVC0_CONTEXT_H__ + +#include <stdio.h> +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_dynarray.h" + +#include "draw/draw_vertex.h" + +#include "nvc0_winsys.h" +#include "nvc0_stateobj.h" +#include "nvc0_screen.h" +#include "nvc0_program.h" +#include "nvc0_resource.h" + +#include "nvc0_3ddefs.xml.h" +#include "nvc0_3d.xml.h" +#include "nvc0_2d.xml.h" +#include "nvc0_m2mf.xml.h" + +#define NOUVEAU_ERR(fmt, args...) \ +   fprintf(stderr, "%s:%d -  "fmt, __FUNCTION__, __LINE__, ##args); + +#ifdef NOUVEAU_DEBUG +# define NOUVEAU_DBG(args...) printf(args); +#else +# define NOUVEAU_DBG(args...) +#endif + +#define NVC0_NEW_BLEND        (1 << 0) +#define NVC0_NEW_RASTERIZER   (1 << 1) +#define NVC0_NEW_ZSA          (1 << 2) +#define NVC0_NEW_VERTPROG     (1 << 3) +#define NVC0_NEW_TCTLPROG     (1 << 4) +#define NVC0_NEW_TEVLPROG     (1 << 5) +#define NVC0_NEW_GMTYPROG     (1 << 6) +#define NVC0_NEW_FRAGPROG     (1 << 7) +#define NVC0_NEW_BLEND_COLOUR (1 << 8) +#define NVC0_NEW_STENCIL_REF  (1 << 9) +#define NVC0_NEW_CLIP         (1 << 10) +#define NVC0_NEW_SAMPLE_MASK  (1 << 11) +#define NVC0_NEW_FRAMEBUFFER  (1 << 12) +#define NVC0_NEW_STIPPLE      (1 << 13) +#define NVC0_NEW_SCISSOR      (1 << 14) +#define NVC0_NEW_VIEWPORT     (1 << 15) +#define NVC0_NEW_ARRAYS       (1 << 16) +#define NVC0_NEW_VERTEX       (1 << 17) +#define NVC0_NEW_CONSTBUF     (1 << 18) +#define NVC0_NEW_TEXTURES     (1 << 19) +#define NVC0_NEW_SAMPLERS     (1 << 20) + +#define NVC0_BUFCTX_CONSTANT 0 +#define NVC0_BUFCTX_FRAME    1 +#define NVC0_BUFCTX_VERTEX   2 +#define NVC0_BUFCTX_TEXTURES 3 +#define NVC0_BUFCTX_COUNT    4 + +struct nvc0_context { +   struct pipe_context pipe; + +   struct nvc0_screen *screen; + +   struct util_dynarray residents[NVC0_BUFCTX_COUNT]; + +   uint32_t dirty; + +   struct { +      uint32_t instance_bits; +      uint32_t instance_base; +      int32_t index_bias; +      boolean prim_restart; +      uint8_t num_vtxbufs; +      uint8_t num_vtxelts; +      uint8_t num_textures[5]; +      uint8_t num_samplers[5]; +      uint16_t scissor; +      uint32_t uniform_buffer_bound[5]; +   } state; + +   struct nvc0_blend_stateobj *blend; +   struct nvc0_rasterizer_stateobj *rast; +   struct nvc0_zsa_stateobj *zsa; +   struct nvc0_vertex_stateobj *vertex; + +   struct nvc0_program *vertprog; +   struct nvc0_program *tctlprog; +   struct nvc0_program *tevlprog; +   struct nvc0_program *gmtyprog; +   struct nvc0_program *fragprog; + +   struct pipe_resource *constbuf[5][16]; +   uint16_t constbuf_dirty[5]; + +   struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; +   unsigned num_vtxbufs; +   struct pipe_index_buffer idxbuf; +   uint32_t vbo_fifo; + +   struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS]; +   unsigned num_textures[5]; +   struct nvc0_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS]; +   unsigned num_samplers[5]; + +   struct pipe_framebuffer_state framebuffer; +   struct pipe_blend_color blend_colour; +   struct pipe_stencil_ref stencil_ref; +   struct pipe_poly_stipple stipple; +   struct pipe_scissor_state scissor; +   struct pipe_viewport_state viewport; +   struct pipe_clip_state clip; + +   unsigned sample_mask; + +   boolean vbo_dirty; +   boolean vbo_push_hint; + +   struct draw_context *draw; +}; + +static INLINE struct nvc0_context * +nvc0_context(struct pipe_context *pipe) +{ +   return (struct nvc0_context *)pipe; +} + +struct nvc0_surface { +   struct pipe_surface pipe; +}; + +static INLINE struct nvc0_surface * +nvc0_surface(struct pipe_surface *ps) +{ +   return (struct nvc0_surface *)ps; +} + +static INLINE void +nvc0_make_bo_resident(struct nvc0_context *nvc0, struct nouveau_bo *bo, +                      unsigned flags) +{ +   nouveau_reloc_emit(nvc0->screen->base.channel, +                      NULL, 0, NULL, bo, 0, 0, flags, 0, 0); +} + +static INLINE void +nvc0_make_buffer_resident(struct nvc0_context *nvc0, +                          struct nvc0_resource *res, unsigned flags) +{ +   nvc0_resource_validate(res, flags); +   nvc0_make_bo_resident(nvc0, res->bo, flags); +} + +/* nvc0_context.c */ +struct pipe_context *nvc0_create(struct pipe_screen *, void *); + +void nvc0_bufctx_emit_relocs(struct nvc0_context *); +void nvc0_bufctx_reset(struct nvc0_context *, int ctx); +void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, +                              struct nvc0_resource *, uint32_t flags); +void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx, +                              struct nvc0_resource *); + +/* nvc0_draw.c */ +extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); + +/* nvc0_program.c */ +boolean nvc0_program_translate(struct nvc0_program *); +void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); + +/* nvc0_shader_state.c */ +void nvc0_vertprog_validate(struct nvc0_context *); +void nvc0_tctlprog_validate(struct nvc0_context *); +void nvc0_tevlprog_validate(struct nvc0_context *); +void nvc0_gmtyprog_validate(struct nvc0_context *); +void nvc0_fragprog_validate(struct nvc0_context *); + +/* nvc0_state.c */ +extern void nvc0_init_state_functions(struct nvc0_context *); + +/* nvc0_state_validate.c */ +extern boolean nvc0_state_validate(struct nvc0_context *); + +/* nvc0_surface.c */ +extern void nvc0_clear(struct pipe_context *, unsigned buffers, +                       const float *rgba, double depth, unsigned stencil); +extern void nvc0_init_surface_functions(struct nvc0_context *); + +/* nvc0_tex.c */ +void nvc0_validate_textures(struct nvc0_context *); +void nvc0_validate_samplers(struct nvc0_context *); + +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *, +                         struct pipe_resource *, +                         const struct pipe_sampler_view *); + +/* nvc0_transfer.c */ +void +nvc0_m2mf_push_linear(struct nvc0_context *nvc0, +		      struct nouveau_bo *dst, unsigned domain, int offset, +		      unsigned size, void *data); +void +nvc0_m2mf_copy_linear(struct nvc0_context *nvc0, +		      struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, +		      struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, +		      unsigned size); + +/* nvc0_vbo.c */ +void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, +                         unsigned num_elements, +                         const struct pipe_vertex_element *elements); +void +nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso); + +void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0); + +/* nvc0_push.c */ +void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); +void nvc0_push_vbo2(struct nvc0_context *, const struct pipe_draw_info *); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_draw.c b/src/gallium/drivers/nvc0/nvc0_draw.c new file mode 100644 index 0000000000..ac7e9f66a1 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_draw.c @@ -0,0 +1,88 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nvc0_context.h" + +struct nvc0_render_stage { +   struct draw_stage stage; +   struct nvc0_context *nvc0; +}; + +static INLINE struct nvc0_render_stage * +nvc0_render_stage(struct draw_stage *stage) +{ +   return (struct nvc0_render_stage *)stage; +} + +static void +nvc0_render_point(struct draw_stage *stage, struct prim_header *prim) +{ +   NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_line(struct draw_stage *stage, struct prim_header *prim) +{ +   NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ +   NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nvc0_render_reset_stipple_counter(struct draw_stage *stage) +{ +   NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_destroy(struct draw_stage *stage) +{ +   FREE(stage); +} + +struct draw_stage * +nvc0_draw_render_stage(struct nvc0_context *nvc0) +{ +   struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage); + +   rs->nvc0 = nvc0; +   rs->stage.draw = nvc0->draw; +   rs->stage.destroy = nvc0_render_destroy; +   rs->stage.point = nvc0_render_point; +   rs->stage.line = nvc0_render_line; +   rs->stage.tri = nvc0_render_tri; +   rs->stage.flush = nvc0_render_flush; +   rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter; + +   return &rs->stage; +} diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c new file mode 100644 index 0000000000..dc2abe45bd --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -0,0 +1,180 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_fence.h" +#include "nvc0_context.h" +#include "nvc0_screen.h" + +#ifdef PIPE_OS_UNIX +#include <sched.h> +#endif + +boolean +nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence, +                      boolean emit) +{ +   *fence = CALLOC_STRUCT(nvc0_fence); +   if (!*fence) +      return FALSE; + +   (*fence)->screen = screen; +   (*fence)->ref = 1; + +   if (emit) +      nvc0_fence_emit(*fence); + +   return TRUE; +} + +void +nvc0_fence_emit(struct nvc0_fence *fence) +{ +   struct nvc0_screen *screen = fence->screen; +   struct nouveau_channel *chan = screen->base.channel; + +   fence->sequence = ++screen->fence.sequence; + +   assert(fence->state == NVC0_FENCE_STATE_AVAILABLE); + +   BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); +   OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); +   OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); +   OUT_RING  (chan, fence->sequence); +   OUT_RING  (chan, NVC0_3D_QUERY_GET_FENCE); + +   ++fence->ref; + +   if (screen->fence.tail) +      screen->fence.tail->next = fence; +   else +      screen->fence.head = fence; + +   screen->fence.tail = fence; + +   fence->state = NVC0_FENCE_STATE_EMITTED; +} + +void +nvc0_fence_del(struct nvc0_fence *fence) +{ +   struct nvc0_fence *it; +   struct nvc0_screen *screen = fence->screen; + +   if (fence->state == NVC0_FENCE_STATE_EMITTED) { +      if (fence == screen->fence.head) { +         screen->fence.head = fence->next; +         if (!screen->fence.head) +            screen->fence.tail = NULL; +      } else { +         for (it = screen->fence.head; it && it->next != fence; it = it->next); +         it->next = fence->next; +         if (screen->fence.tail == fence) +            screen->fence.tail = it; +      } +   } +   FREE(fence); +} + +static void +nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) +{ +   struct nvc0_mm_allocation *alloc = fence->buffers; + +   while (alloc) { +      struct nvc0_mm_allocation *next = alloc->next; +      nvc0_mm_free(alloc); +      alloc = next; +   }; +} + +static void +nvc0_screen_fence_update(struct nvc0_screen *screen) +{ +   struct nvc0_fence *fence; +   struct nvc0_fence *next = NULL; +   uint32_t sequence = screen->fence.map[0]; + +   if (screen->fence.sequence_ack == sequence) +      return; +   screen->fence.sequence_ack = sequence; + +   for (fence = screen->fence.head; fence; fence = next) { +      next = fence->next; +      sequence = fence->sequence; + +      fence->state = NVC0_FENCE_STATE_SIGNALLED; + +      if (fence->buffers) +         nvc0_fence_trigger_release_buffers(fence); + +      nvc0_fence_reference(&fence, NULL); + +      if (sequence == screen->fence.sequence_ack) +         break; +   } +   screen->fence.head = next; +   if (!next) +      screen->fence.tail = NULL; +} + +#define NVC0_FENCE_MAX_SPINS (1 << 17) + +boolean +nvc0_fence_wait(struct nvc0_fence *fence) +{ +   struct nvc0_screen *screen = fence->screen; +   int spins = 0; + +   if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { +      nvc0_fence_emit(fence); + +      FIRE_RING(screen->base.channel); + +      if (fence == screen->fence.current) +         nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); +   } + +   do { +      nvc0_screen_fence_update(screen); + +      if (fence->state == NVC0_FENCE_STATE_SIGNALLED) +         return TRUE; +      spins++; +#ifdef PIPE_OS_UNIX +      if (!(spins % 8)) /* donate a few cycles */ +         sched_yield(); +#endif +   } while (spins < NVC0_FENCE_MAX_SPINS); + +   if (spins > 9000) +      NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence); + +   return FALSE; +} + +void +nvc0_screen_fence_next(struct nvc0_screen *screen) +{ +   nvc0_fence_emit(screen->fence.current); +   nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); +   nvc0_screen_fence_update(screen); +} diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h new file mode 100644 index 0000000000..7b31f28808 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -0,0 +1,47 @@ + +#ifndef __NVC0_FENCE_H__ +#define __NVC0_FENCE_H__ + +#include "util/u_inlines.h" +#include "util/u_double_list.h" + +#define NVC0_FENCE_STATE_AVAILABLE 0 +#define NVC0_FENCE_STATE_EMITTED   1 +#define NVC0_FENCE_STATE_SIGNALLED 2 + +struct nvc0_mm_allocation; + +struct nvc0_fence { +   struct nvc0_fence *next; +   struct nvc0_screen *screen; +   int state; +   int ref; +   uint32_t sequence; +   struct nvc0_mm_allocation *buffers; +}; + +void nvc0_fence_emit(struct nvc0_fence *); +void nvc0_fence_del(struct nvc0_fence *); + +boolean nvc0_fence_wait(struct nvc0_fence *); + +static INLINE void +nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence) +{ +   if (*ref) { +      if (--(*ref)->ref == 0) +         nvc0_fence_del(*ref); +   } +   if (fence) +      ++fence->ref; + +   *ref = fence; +} + +static INLINE struct nvc0_fence * +nvc0_fence(struct pipe_fence_handle *fence) +{ +   return (struct nvc0_fence *)fence; +} + +#endif // __NVC0_FENCE_H__ diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c new file mode 100644 index 0000000000..5d02357381 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_formats.c @@ -0,0 +1,462 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_screen.h" +#include "nv50_texture.xml.h" +#include "nvc0_3d.xml.h" +#include "nv50_defs.xml.h" +#include "nv50_texture.xml.h" +#include "pipe/p_defines.h" + +#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r)     \ +   (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) |    \ +   (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) |  \ +   (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) |    \ +   (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) |  \ +   (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) |    \ +   (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) |  \ +   (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) |    \ +   (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) |  \ +   NV50_TIC_0_FMT_##sz,                               \ +   NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##sz |           \ +   NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t0 |           \ +   (r << 31) + +#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r)     \ +   (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) |    \ +   (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) |  \ +   (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) |    \ +   (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) |  \ +   (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) |    \ +   (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) |  \ +   (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) |    \ +   (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) |  \ +   NV50_TIC_0_FMT_##sz, 0 + +#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER +#define SAMPLER_VIEW  PIPE_BIND_SAMPLER_VIEW +#define RENDER_TARGET PIPE_BIND_RENDER_TARGET +#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL +#define SCANOUT       PIPE_BIND_SCANOUT + +/* for vertex buffers: */ +#define NV50_TIC_0_FMT_8_8_8    NV50_TIC_0_FMT_8_8_8_8 +#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16 +#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32 + +const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = +{ +   /* COMMON FORMATS */ + +   [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM, +    A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + +   [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM, +    A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + +   [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB, +    A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB, +    A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM, +    B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1), +    SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + +   [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM, +    B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), +    SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + +   [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, +    B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM, +    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0), +    SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT }, + +   [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM, +    A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1), +    SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER }, + +   /* DEPTH/STENCIL FORMATS */ + +   [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM, +    B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_ZETA, 0), +    SAMPLER_VIEW | DEPTH_STENCIL }, + +   [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM, +    B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0), +    SAMPLER_VIEW | DEPTH_STENCIL }, + +   [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM, +    B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0), +    SAMPLER_VIEW | DEPTH_STENCIL }, + +   [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8Z24_UNORM, +    B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0), +    SAMPLER_VIEW | DEPTH_STENCIL }, + +   [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, +    B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_ZETA, 0), +    SAMPLER_VIEW | DEPTH_STENCIL }, + +   [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = { +    NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM, +    B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0), +    SAMPLER_VIEW | DEPTH_STENCIL }, + +   /* LUMINANCE, ALPHA, INTENSITY */ + +   [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, +    A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM, +    A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, +    A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM, +    A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), +    SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_L8A8_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, +    A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_L8A8_SRGB] = { 0, +    A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), +    SAMPLER_VIEW }, + +   /* DXT, RGTC */ + +   [PIPE_FORMAT_DXT1_RGB] = { 0, +    B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_DXT1_RGBA] = { 0, +    B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_DXT3_RGBA] = { 0, +    B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_DXT5_RGBA] = { 0, +    B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_RGTC1_UNORM] = { 0, +    B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_RGTC1_SNORM] = { 0, +    B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_RGTC2_UNORM] = { 0, +    B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0), +    SAMPLER_VIEW }, + +   [PIPE_FORMAT_RGTC2_SNORM] = { 0, +    B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0), +    SAMPLER_VIEW }, + +   /* FLOAT 16 */ + +   [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT, +    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT, +    A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT, +    A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, +    A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   /* FLOAT 32 */ + +   [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT, +    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT, +    A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT, +    A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, +    A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   /* ODD FORMATS */ + +   [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT, +    B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0), +    SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0, +    B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0), +    SAMPLER_VIEW }, + +   /* SNORM 32 */ + +   [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0, +    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32G32B32_SNORM] = { 0, +    A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32G32_SNORM] = { 0, +    A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32_SNORM] = { 0, +    A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   /* UNORM 32 */ + +   [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0, +    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32G32B32_UNORM] = { 0, +    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32G32_UNORM] = { 0, +    A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32_UNORM] = { 0, +    A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   /* SNORM 16 */ + +   [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM, +    A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R16G16B16_SNORM] = { 0, +    A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, +    A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, +    A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   /* UNORM 16 */ + +   [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM, +    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R16G16B16_UNORM] = { 0, +    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, +    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, +    A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   /* SNORM 8 */ + +   [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM, +    A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R8G8B8_SNORM] = { 0, +    A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM, +    A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM, +    A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   /* UNORM 8 */ + +   [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM, +    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB, +    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), +    SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM, +    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB, +    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), +    SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM, +    A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, +    A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + +   /* SSCALED 32 */ + +   [PIPE_FORMAT_R32G32B32A32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_SINT, +    A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32G32B32_SSCALED] = { 0, +    A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32G32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32_SINT, +    A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32_SSCALED] = { 0, +    A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   /* USCALED 32 */ + +   [PIPE_FORMAT_R32G32B32A32_USCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_UINT, +    A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32G32B32_USCALED] = { 0, +    A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32G32_USCALED] = { NV50_SURFACE_FORMAT_R32G32_UINT, +    A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R32_USCALED] = { 0, +    A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   /* SSCALED 16 */ + +   [PIPE_FORMAT_R16G16B16A16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_SINT, +    A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R16G16B16_SSCALED] = { 0, +    A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R16G16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16_SINT, +    A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R16_SSCALED] = { NV50_SURFACE_FORMAT_R16_SINT, +    A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   /* USCALED 16 */ + +   [PIPE_FORMAT_R16G16B16A16_USCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_UINT, +    A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R16G16B16_USCALED] = { 0, +    A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R16G16_USCALED] = { NV50_SURFACE_FORMAT_R16G16_UINT, +    A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R16_USCALED] = { NV50_SURFACE_FORMAT_R16_UINT, +    A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   /* SSCALED 8 */ + +   [PIPE_FORMAT_R8G8B8A8_SSCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_SINT, +    A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R8G8B8_SSCALED] = { 0, +    A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R8G8_SSCALED] = { NV50_SURFACE_FORMAT_R8G8_SINT, +    A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R8_SSCALED] = { NV50_SURFACE_FORMAT_R8_SINT, +    A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   /* USCALED 8 */ + +   [PIPE_FORMAT_R8G8B8A8_USCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_UINT, +    A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R8G8B8_USCALED] = { 0, +    A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R8G8_USCALED] = { NV50_SURFACE_FORMAT_R8G8_UINT, +    A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, + +   [PIPE_FORMAT_R8_USCALED] = { NV50_SURFACE_FORMAT_R8_UINT, +    A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0), +    VERTEX_BUFFER | SAMPLER_VIEW }, +}; diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h new file mode 100644 index 0000000000..7db09287ab --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h @@ -0,0 +1,220 @@ + +#ifndef __NVC0_PGRAPH_MACROS_H__ +#define __NVC0_PGRAPH_MACROS_H__ + +static const uint32_t nvc0_9097_blend_enables[] = +{ +   0x05360021, /* maddr [0x1360, increment = 0x4] */ +   0x00404042, /* sendbf $r1 0 0x1 */ +   0x00424042, /* sendbf $r1 1 0x1 */ +   0x00444042, /* sendbf $r1 2 0x1 */ +   0x00464042, /* sendbf $r1 3 0x1 */ +   0x00484042, /* sendbf $r1 4 0x1 */ +   0x004a4042, /* sendbf $r1 5 0x1 */ +   0x004c4042, /* sendbf $r1 6 0x1 */ +   0x004e4042, /* sendbf $r1 7 0x1 */ +   0x01534215, /* read $r2 0x1534 */ +   0x03428021, /* maddr [0x3428] */ +   0x00000841, /* send $r1 */ +   0x00d9c021, /* maddr [0x0d9c] */ +   0x00014807, /* braz $r1 0x5 */ +   0x00000311, /* mov $r3 0 */ +   0x00009027, /* braz annul $r2 0x2 */ +   0x0342c315, /* read $r3 [0x342c] */ +   0x00000011, /* mov $r0 0 */ +   0x000018c1, /* exit send $r3 */ +   0x00000011  /* mov $r0 0, delay slot */ +}; + +/* + * if (limit == 0) { + *    LIMIT = 0; + *    START = 1; + * } else { + *    LIMIT = limit - 1; + *    START = start; + * } + */ +static const uint32_t nvc0_9097_vertex_array_select[] = +{ +   0x00000301, +   0x00000401, +   0x00000701, +   0x00131d10, +   0x0001a807, +   0x00000601, +   0x00004211, +   0x0004a410, +   0x0000c007, +   0x00061b10, +   0x00004611, +   0x11004512, +   0x01c06d11, +   0x00004211, +   0x6180a822, +   0x0201c042, +   0x00003041, +   0x09004512, +   0x01f02d11, +   0x6180a822, +   0x0200c0c2, +   0x00002041 +}; + +static const uint32_t nvc0_9097_color_mask_brdc[] = +{ +   0x05a00021, /* maddr [0x1a00, increment = 4] */ +   0x00000841, /* send $r1 */ +   0x00000841, /* send $r1 */ +   0x00000841, /* send $r1 */ +   0x00000841, /* send $r1 */ +   0x00000841, /* send $r1 */ +   0x00000841, /* send $r1 */ +   0x000008c1, /* exit send $r1 */ +   0x00000841, /* send $r1 */ +}; + +static const uint32_t nvc0_9097_poly_mode_front[] = +{ +   0x03410615, +   0x00db0515, +   0x22018312, +   0x02100415, +   0x00dac021, +   0x00000841, +   0x06c04211, +   0x0004aa10, +   0x00009037, +   0x22118312, +   0x020c0515, +   0x06c04211, +   0x00048a10, +   0x00009037, +   0x22118312, +   0x007f8612, +   0x0000f037, +   0x01a24021, +   0x1cd4c041, +   0x00104211, +   0x0004a210, +   0x00009037, +   0x00000311, +   0x000c4211, +   0x0004aa10, +   0x00009037, +   0x00000311, +   0x002ec0a1, +   0x00001841 +}; + +static const uint32_t nvc0_9097_poly_mode_back[] = +{ +   0x03410615, +   0x00dac515, +   0x22018312, +   0x02100415, +   0x00db0021, +   0x00000841, +   0x06c04211, +   0x0004aa10, +   0x00009037, +   0x22118312, +   0x020c0515, +   0x06c04211, +   0x00048a10, +   0x00009037, +   0x22118312, +   0x007f8612, +   0x0000f037, +   0x01a24021, +   0x1cd4c041, +   0x00104211, +   0x0004a210, +   0x00009037, +   0x00000311, +   0x000c4211, +   0x0004aa10, +   0x00009037, +   0x00000311, +   0x002ec0a1, +   0x00001841 +}; + +static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */ +{ +   0x03410615, /* 0x00: read $r6 [0x3410] */ +   0x00dac515, /* 0x01: read $r5 [NVC0_3D_POLYGON_MODE_FRONT] */ +   0x22018312, /* 0x02: mov $r3 extrinsrt 0 $r6 0 0x8 0x4 */ +   0x00db0415, /* 0x03: read $r4 [NVC0_3D_POLYGON_MODE_BACK] */ +   0x02100021, /* 0x04: maddr [NVC0_3D_SP_SELECT(4)] */ +   0x00000841, /* 0x05: send $r1 */ +   0x06c04211, /* 0x06: mov $r2 GL_POLYGON_MODE_LINE */ +   0x0004aa10, /* 0x07: mov $r2 sub $r5 $r2 */ +   0x00009037, /* 0x08: branz annul $r2 0xa */ +   0x22118312, /* 0x09: mov $r3 extrinsrt 0 $r6 0x8 0x8 0x4 */ +   0x020c0515, /* 0x0a: read $r5 [NVC0_3D_SP_SELECT(3)] */ +   0x06c04211, /* 0x0b: mov $r2 GL_POLYGON_MODE_LINE */ +   0x0004a210, /* 0x0c: mov $r2 sub $r4 $r2 */ +   0x00009037, /* 0x0d: branz annul $r2 0xf */ +   0x22118312, /* 0x0e: mov $r3 extrinsrt 0 $r6 0x8 0x8 0x4 */ +   0x007f8612, /* 0x0f: mov $r6 extrinsrt 0 $r6 0x1f 0x1 0 */ +   0x0000f037, /* 0x10: branz annul $r6 0x13 */ +   0x01a24021, /* 0x11: maddr [0x1a24] */ +   0x1cd4c041, /* 0x12: send 0x7353 */ +   0x00104211, /* 0x13: mov $r2 0x41 */ +   0x00048a10, /* 0x14: mov $r2 sub $r1 $r2 */ +   0x00009037, /* 0x15: branz annul $r2 0x17 */ +   0x00000311, /* 0x16: mov $r3 0 */ +   0x000c4211, /* 0x17: mov $r2 0x31 */ +   0x0004aa10, /* 0x18: mov $r2 sub $r5 $r2 */ +   0x00009037, /* 0x19: branz annul $r2 0x1b */ +   0x00000311, /* 0x1a: mov $r3 0 */ +   0x002ec0a1, /* 0x1b: exit maddr [0x02ec] */ +   0x00001841  /* 0x1c: send $r3 */ +}; + +static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */ +{ +   0x03410615, /* 0x00: read $r6 [0x3410] */ +   0x00dac515, /* 0x01: read $r5 [NVC0_3D_POLYGON_MODE_FRONT] */ +   0x22018312, /* 0x02: mov $r3 extrinsrt 0 $r6 0 0x8 0x4 */ +   0x00db0415, /* 0x03: read $r4 [NVC0_3D_POLYGON_MODE_BACK] */ +   0x020c0021, /* 0x04: maddr [NVC0_3D_SP_SELECT(3), increment = 0] */ +   0x00000841, /* 0x05: send $r1 */ +   0x06c04211, /* 0x06: mov $r2 GL_POLYGON_MODE_LINE */ +   0x0004aa10, /* 0x07: mov $r2 sub $r5 $r2 */ +   0x00009037, /* 0x08: branz annul $r2 0xa */ +   0x22118312, /* 0x09: mov $r3 extrinsrt 0 $r6 0x8 0x8 0x4 */ +   0x02100515, /* 0x0a: read $r5 [NVC0_3D_SP_SELECT(4)] */ +   0x06c04211, /* 0x0b: mov $r2 GL_POLYGON_MODE_LINE */ +   0x0004a210, /* 0x0c: mov $r2 sub $r4 $r2 */ +   0x00009037, /* 0x0d: branz annul $r2 0xf */ +   0x22118312, /* 0x0e: mov $r3 extrinsrt 0 $r6 0x8 0x8 0x4 */ +   0x007f8612, /* 0x0f: mov $r6 extrinsrt 0 $r6 0x1f 0x1 0 */ +   0x0000f037, /* 0x10: branz annul $r6 */ +   0x01a24021, /* 0x11: maddr [0x1a24] */ +   0x1cd4c041, /* 0x12: send 0x7353 */ +   0x00104211, /* 0x13: mov $r2 0x41 */ +   0x0004aa10, /* 0x14: mov $r2 sub $r5 $r2 */ +   0x00009037, /* 0x15: branz annul $r2 0x17 */ +   0x00000311, /* 0x16: mov $r3 0 */ +   0x000c4211, /* 0x17: mov $r2 0x31 */ +   0x00048a10, /* 0x18: mov $r2 sub $r1 $r2 */ +   0x00035037, /* 0x19: branz annul $r2 0x26 */ +   0x00000311, /* 0x1a: mov $r3 0 */ +   0x03430415, /* 0x1b: read $r4 [0x3430] */ +   0x00450512, /* 0x1c: mov $r5 extrinsrt 0 $r4 0x2 0x1 0 */ +   0x00004211, /* 0x1d: mov $r2 0x1 */ +   0x0004aa10, /* 0x1e: mov $r2 sub $r5 $r2 */ +   0x00019037, /* 0x1f: branz annul $r2 0x25 */ +   0x0003c211, /* 0x20: mov $r2 0xf */ +   0x01a2c021, /* 0x21: maddr [0x1a2c] */ +   0xffffd211, /* 0x22: mov $r2 add $r2 -0x1 */ +   0xffff9017, /* 0x23: branz $r2 add $r2 */ +   0x00000041, /* 0x24: send 0 */ +   0x00000011, /* 0x25: nop */ +   0x002ec0a1, /* 0x26: exit maddr [0x02ec] */ +   0x00001841  /* 0x27: send $r3 */ +}; + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h new file mode 100644 index 0000000000..3bf628d425 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h @@ -0,0 +1,138 @@ +#ifndef NVC0_M2MF_XML +#define NVC0_M2MF_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_m2mf.xml  (   2227 bytes, from 2010-10-16 16:10:29) +- copyright.xml  (   6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml  (  11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml (   2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml    (   4437 bytes, from 2010-07-06 07:43:58) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_M2MF_TILING_MODE_IN				0x00000204 + +#define NVC0_M2MF_TILING_PITCH_IN				0x00000208 + +#define NVC0_M2MF_TILING_HEIGHT_IN				0x0000020c + +#define NVC0_M2MF_TILING_DEPTH_IN				0x00000210 + +#define NVC0_M2MF_TILING_POSITION_IN_Z				0x00000214 + +#define NVC0_M2MF_TILING_MODE_OUT				0x00000220 + +#define NVC0_M2MF_TILING_PITCH_OUT				0x00000224 + +#define NVC0_M2MF_TILING_HEIGHT_OUT				0x00000228 + +#define NVC0_M2MF_TILING_DEPTH_OUT				0x0000022c + +#define NVC0_M2MF_TILING_POSITION_OUT_Z				0x00000230 + +#define NVC0_M2MF_OFFSET_OUT_HIGH				0x00000238 + +#define NVC0_M2MF_OFFSET_OUT_LOW				0x0000023c + +#define NVC0_M2MF_EXEC						0x00000300 +#define NVC0_M2MF_EXEC_PUSH					0x00000001 +#define NVC0_M2MF_EXEC_LINEAR_IN				0x00000010 +#define NVC0_M2MF_EXEC_LINEAR_OUT				0x00000100 +#define NVC0_M2MF_EXEC_NOTIFY					0x00002000 +#define NVC0_M2MF_EXEC_INC__MASK				0x00f00000 +#define NVC0_M2MF_EXEC_INC__SHIFT				20 + +#define NVC0_M2MF_DATA						0x00000304 + +#define NVC0_M2MF_OFFSET_IN_HIGH				0x0000030c + +#define NVC0_M2MF_OFFSET_IN_LOW					0x00000310 + +#define NVC0_M2MF_PITCH_IN					0x00000314 + +#define NVC0_M2MF_PITCH_OUT					0x00000318 + +#define NVC0_M2MF_LINE_LENGTH_IN				0x0000031c + +#define NVC0_M2MF_LINE_COUNT					0x00000320 + +#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH				0x0000032c + +#define NVC0_M2MF_NOTIFY_ADDRESS_LOW				0x00000330 + +#define NVC0_M2MF_NOTIFY					0x00000334 + +#define NVC0_M2MF_TILING_POSITION_IN_X				0x00000344 + +#define NVC0_M2MF_TILING_POSITION_IN_Y				0x00000348 + +#define NVC0_M2MF_TILING_POSITION_OUT_X				0x0000034c + +#define NVC0_M2MF_TILING_POSITION_OUT_Y				0x00000350 + + +#endif /* NVC0_M2MF_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c new file mode 100644 index 0000000000..cca307b37f --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -0,0 +1,327 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" +#include "nvc0_transfer.h" + +static INLINE uint32_t +get_tile_dims(unsigned nx, unsigned ny, unsigned nz) +{ +   uint32_t tile_mode = 0x000; + +   if (ny > 64) tile_mode = 0x040; /* height 128 tiles */ +   else +   if (ny > 32) tile_mode = 0x030; /* height 64 tiles */ +   else +   if (ny > 16) tile_mode = 0x020; /* height 32 tiles */ +   else +   if (ny >  8) tile_mode = 0x010; /* height 16 tiles */ + +   if (nz == 1) +      return tile_mode; +   else +   if (tile_mode > 0x020) +      tile_mode = 0x020; + +   if (nz > 16 && tile_mode < 0x020) +      return tile_mode | 0x500; /* depth 32 tiles */ +   if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */ +   if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */ +   if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */ + +   return tile_mode | 0x100; +} + +static INLINE unsigned +get_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh) +{ +   unsigned tile_h = NVC0_TILE_H(tile_mode); +   unsigned tile_d = NVC0_TILE_D(tile_mode); + +   /* pitch_2d == to next slice within this volume tile */ +   /* pitch_3d == size (in bytes) of a volume tile */ +   unsigned pitch_2d = tile_h * 64; +   unsigned pitch_3d = tile_d * align(nbh, tile_h) * pitch; + +   return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d; +} + +static void +nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) +{ +   struct nvc0_miptree *mt = nvc0_miptree(pt); +   unsigned l; + +   for (l = 0; l <= pt->last_level; ++l) +      FREE(mt->level[l].image_offset); + +   nouveau_screen_bo_release(pscreen, mt->base.bo); + +   FREE(mt); +} + +static boolean +nvc0_miptree_get_handle(struct pipe_screen *pscreen, +                        struct pipe_resource *pt, +                        struct winsys_handle *whandle) +{ +   struct nvc0_miptree *mt = nvc0_miptree(pt); +   unsigned stride; + +   if (!mt || !mt->base.bo) +      return FALSE; + +   stride = util_format_get_stride(mt->base.base.format, +                                   mt->base.base.width0); + +   return nouveau_screen_bo_get_handle(pscreen, +                                       mt->base.bo, +                                       stride, +                                       whandle); +} + +const struct u_resource_vtbl nvc0_miptree_vtbl = +{ +   nvc0_miptree_get_handle,         /* get_handle */ +   nvc0_miptree_destroy,            /* resource_destroy */ +   NULL,                            /* is_resource_referenced */ +   nvc0_miptree_transfer_new,       /* get_transfer */ +   nvc0_miptree_transfer_del,       /* transfer_destroy */ +   nvc0_miptree_transfer_map,	      /* transfer_map */ +   u_default_transfer_flush_region, /* transfer_flush_region */ +   nvc0_miptree_transfer_unmap,     /* transfer_unmap */ +   u_default_transfer_inline_write  /* transfer_inline_write */ +}; + +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, +                    const struct pipe_resource *templ) +{ +   struct nouveau_device *dev = nouveau_screen(pscreen)->device; +   struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree); +   struct pipe_resource *pt = &mt->base.base; +   int ret, i; +   unsigned w, h, d, l, image_alignment, alloc_size; +   uint32_t tile_flags; + +   if (!mt) +      return NULL; + +   mt->base.vtbl = &nvc0_miptree_vtbl; +   *pt = *templ; +   pipe_reference_init(&pt->reference, 1); +   pt->screen = pscreen; + +   w = pt->width0; +   h = pt->height0; +   d = pt->depth0; + +   switch (pt->format) { +   case PIPE_FORMAT_Z16_UNORM: +      tile_flags = 0x0700; /* COMPRESSED */ +      tile_flags = 0x0200; /* NORMAL ? */ +      tile_flags = 0x0100; /* NORMAL ? */ +      break; +   case PIPE_FORMAT_S8_USCALED_Z24_UNORM: +      tile_flags = 0x5300; /* MSAA 4, COMPRESSED */ +      tile_flags = 0x4600; /* NORMAL */ +      break; +   case PIPE_FORMAT_Z24X8_UNORM: +   case PIPE_FORMAT_Z24_UNORM_S8_USCALED: +      tile_flags = 0x1100; /* NORMAL */ +      if (w * h >= 128 * 128 && 0) +         tile_flags = 0x1700; /* COMPRESSED, requires magic */ +      break; +   case PIPE_FORMAT_R32G32B32A32_FLOAT: +      tile_flags = 0xf500; /* COMPRESSED */ +      tile_flags = 0xf700; /* MSAA 2 */ +      tile_flags = 0xf900; /* MSAA 4 */ +      tile_flags = 0xfe00; /* NORMAL */ +      break; +   case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: +      tile_flags = 0xce00; /* COMPRESSED */ +      tile_flags = 0xcf00; /* MSAA 2, COMPRESSED */ +      tile_flags = 0xd000; /* MSAA 4, COMPRESSED */ +      tile_flags = 0xc300; /* NORMAL */ +      break; +   case PIPE_FORMAT_R16G16B16A16_UNORM: +      tile_flags = 0xe900; /* COMPRESSED */ +      break; +   default: +      tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */ +      tile_flags = 0xfe00; /* NORMAL 32 BIT */ +      if (w * h >= 128 * 128 && 0) +         tile_flags = 0xdb00; /* COMPRESSED 32 BIT, requires magic */ +      break; +   } + +   /* XXX: texture arrays */ +   mt->image_nr = (pt->target == PIPE_TEXTURE_CUBE) ? 6 : 1; + +   for (l = 0; l <= pt->last_level; l++) { +      struct nvc0_miptree_level *lvl = &mt->level[l]; +      unsigned nby = util_format_get_nblocksy(pt->format, h); + +      lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); +      lvl->pitch = align(util_format_get_stride(pt->format, w), 64); +      lvl->tile_mode = get_tile_dims(w, nby, d); + +      w = u_minify(w, 1); +      h = u_minify(h, 1); +      d = u_minify(d, 1); +   } + +   image_alignment  = NVC0_TILE_H(mt->level[0].tile_mode) * 64; +   image_alignment *= NVC0_TILE_D(mt->level[0].tile_mode); + +   /* NOTE the distinction between arrays of mip-mapped 2D textures and +    * mip-mapped 3D textures. We can't use image_nr == depth for 3D mip. +    */ +   for (i = 0; i < mt->image_nr; i++) { +      for (l = 0; l <= pt->last_level; l++) { +         struct nvc0_miptree_level *lvl = &mt->level[l]; +         int size; +         unsigned tile_h = NVC0_TILE_H(lvl->tile_mode); +         unsigned tile_d = NVC0_TILE_D(lvl->tile_mode); + +         h = u_minify(pt->height0, l); +         d = u_minify(pt->depth0, l); + +         size  = lvl->pitch; +         size *= align(util_format_get_nblocksy(pt->format, h), tile_h); +         size *= align(d, tile_d); + +         lvl->image_offset[i] = mt->total_size; + +         mt->total_size += size; +      } +      mt->total_size = align(mt->total_size, image_alignment); +   } + +   alloc_size = mt->total_size; +   if (tile_flags == 0x1700) +      alloc_size *= 3; /* HiZ, XXX: correct size */ + +   ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size, +                             mt->level[0].tile_mode, tile_flags, +                             &mt->base.bo); +   if (ret) { +      for (l = 0; l <= pt->last_level; ++l) +         FREE(mt->level[l].image_offset); +      FREE(mt); +      return NULL; +   } + +   return pt; +} + +struct pipe_resource * +nvc0_miptree_from_handle(struct pipe_screen *pscreen, +                         const struct pipe_resource *templ, +                         struct winsys_handle *whandle) +{ +   struct nvc0_miptree *mt; +   unsigned stride; + +	/* only supports 2D, non-mip mapped textures for the moment */ +   if ((templ->target != PIPE_TEXTURE_2D && +        templ->target != PIPE_TEXTURE_RECT) || +       templ->last_level != 0 || +       templ->depth0 != 1) +      return NULL; + +   mt = CALLOC_STRUCT(nvc0_miptree); +   if (!mt) +      return NULL; + +   mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); +   if (mt->base.bo == NULL) { +      FREE(mt); +      return NULL; +   } + +   mt->base.base = *templ; +   mt->base.vtbl = &nvc0_miptree_vtbl; +   pipe_reference_init(&mt->base.base.reference, 1); +   mt->base.base.screen = pscreen; +   mt->image_nr = 1; +   mt->level[0].pitch = stride; +   mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); +   mt->level[0].tile_mode = mt->base.bo->tile_mode; + +   /* no need to adjust bo reference count */ +   return &mt->base.base; +} + + +/* Surface functions. + */ + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, +                         unsigned face, unsigned level, unsigned zslice, +                         unsigned flags) +{ +   struct nvc0_miptree *mt = nvc0_miptree(pt); +   struct nvc0_miptree_level *lvl = &mt->level[level]; +   struct pipe_surface *ps; +   unsigned img = 0; + +   if (pt->target == PIPE_TEXTURE_CUBE) +      img = face; + +   ps = CALLOC_STRUCT(pipe_surface); +   if (!ps) +      return NULL; +   pipe_resource_reference(&ps->texture, pt); +   ps->format = pt->format; +   ps->width = u_minify(pt->width0, level); +   ps->height = u_minify(pt->height0, level); +   ps->usage = flags; +   pipe_reference_init(&ps->reference, 1); +   ps->face = face; +   ps->level = level; +   ps->zslice = zslice; +   ps->offset = lvl->image_offset[img]; + +   if (pt->target == PIPE_TEXTURE_3D) +      ps->offset += get_zslice_offset(lvl->tile_mode, zslice, lvl->pitch, +                                      util_format_get_nblocksy(pt->format, +                                                               ps->height)); +   return ps; +} + +void +nvc0_miptree_surface_del(struct pipe_surface *ps) +{ +   struct nvc0_surface *s = nvc0_surface(ps); + +   pipe_resource_reference(&ps->texture, NULL); + +   FREE(s); +} diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c new file mode 100644 index 0000000000..e031fb393a --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_mm.c @@ -0,0 +1,245 @@ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "nvc0_screen.h" + +#define MM_MIN_ORDER 7 +#define MM_MAX_ORDER 20 + +#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1) + +#define MM_MIN_SIZE (1 << MM_MIN_ORDER) +#define MM_MAX_SIZE (1 << MM_MAX_ORDER) + +struct mm_bucket { +   struct list_head free; +   struct list_head used; +   struct list_head full; +   int num_free; +}; + +struct nvc0_mman { +   struct nouveau_device *dev; +   struct mm_bucket bucket[MM_NUM_BUCKETS]; +   uint32_t storage_type; +   uint32_t domain; +   uint64_t allocated; +}; + +struct mm_slab { +   struct list_head head; +   struct nouveau_bo *bo; +   struct nvc0_mman *cache; +   int order; +   int count; +   int free; +   uint32_t bits[0]; +}; + +static int +mm_slab_alloc(struct mm_slab *slab) +{ +   int i, n, b; + +   if (slab->free == 0) +      return -1; + +   for (i = 0; i < (slab->count + 31) / 32; ++i) { +      b = ffs(slab->bits[i]) - 1; +      if (b >= 0) { +         n = i * 32 + b; +         assert(n < slab->count); +         slab->free--; +         slab->bits[i] &= ~(1 << b); +         return n; +      } +   } +   return -1; +} + +static INLINE void +mm_slab_free(struct mm_slab *slab, int i) +{ +   assert(i < slab->count); +   slab->bits[i / 32] |= 1 << (i % 32); +   slab->free++; +   assert(slab->free <= slab->count); +} + +static INLINE int +mm_get_order(uint32_t size) +{ +   int s = __builtin_clz(size) ^ 31; + +   if (size > (1 << s)) +      s += 1; +   return s; +} + +static struct mm_bucket * +mm_bucket_by_order(struct nvc0_mman *cache, int order) +{ +   if (order > MM_MAX_ORDER) +      return NULL; +   return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER]; +} + +static struct mm_bucket * +mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) +{ +   return mm_bucket_by_order(cache, mm_get_order(size)); +} + +/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */ +static INLINE uint32_t +mm_default_slab_size(unsigned chunk_order) +{ +   assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); + +   static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = +   { +      12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 +   }; + +   return 1 << slab_order[chunk_order - MM_MIN_ORDER]; +} + +static int +mm_slab_new(struct nvc0_mman *cache, int chunk_order) +{ +   struct mm_slab *slab; +   int words, ret; +   const uint32_t size = mm_default_slab_size(chunk_order); + +   words = ((size >> chunk_order) + 31) / 32; +   assert(words); + +   slab = MALLOC(sizeof(struct mm_slab) + words * 4); +   if (!slab) +      return PIPE_ERROR_OUT_OF_MEMORY; + +   memset(&slab->bits[0], ~0, words * 4); + +   slab->bo = NULL; +   ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, +                             0, cache->storage_type, &slab->bo); +   if (ret) { +      FREE(slab); +      return PIPE_ERROR_OUT_OF_MEMORY; +   } + +   LIST_INITHEAD(&slab->head); + +   slab->cache = cache; +   slab->order = chunk_order; +   slab->count = slab->free = size >> chunk_order; + +   LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free); + +   cache->allocated += size; + +   debug_printf("MM: new slab, total memory = %lu KiB\n", +                cache->allocated / 1024); + +   return PIPE_OK; +} + +/* @return token to identify slab or NULL if we just allocated a new bo */ +struct nvc0_mm_allocation * +nvc0_mm_allocate(struct nvc0_mman *cache, +                 uint32_t size, struct nouveau_bo **bo, uint32_t *offset) +{ +   struct mm_bucket *bucket; +   struct mm_slab *slab; +   struct nvc0_mm_allocation *alloc; +   int ret; + +   bucket = mm_bucket_by_size(cache, size); +   if (!bucket) { +      ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, +                                0, cache->storage_type, bo); +      if (ret) +         debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret); + +      *offset = 0; +      return NULL; +   } + +   if (!LIST_IS_EMPTY(&bucket->used)) { +      slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head); +   } else { +      if (LIST_IS_EMPTY(&bucket->free)) { +         mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER)); +      } +      slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head); + +      LIST_DEL(&slab->head); +      LIST_ADD(&slab->head, &bucket->used); +   } + +   *offset = mm_slab_alloc(slab) << slab->order; + +   alloc = MALLOC_STRUCT(nvc0_mm_allocation); +   if (!alloc) +      return NULL; + +   nouveau_bo_ref(slab->bo, bo); + +   if (slab->free == 0) { +      LIST_DEL(&slab->head); +      LIST_ADD(&slab->head, &bucket->full); +   } + +   alloc->next = NULL; +   alloc->offset = *offset; +   alloc->priv = (void *)slab; + +   return alloc; +} + +void +nvc0_mm_free(struct nvc0_mm_allocation *alloc) +{ +   struct mm_slab *slab = (struct mm_slab *)alloc->priv; +   struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order); + +   mm_slab_free(slab, alloc->offset >> slab->order); + +   if (slab->free == 1) { +      LIST_DEL(&slab->head); + +      if (slab->count > 1) +         LIST_ADDTAIL(&slab->head, &bucket->used); +      else +         LIST_ADDTAIL(&slab->head, &bucket->free); +   } + +   FREE(alloc); +} + +struct nvc0_mman * +nvc0_mm_create(struct nouveau_device *dev, uint32_t domain, +               uint32_t storage_type) +{ +   struct nvc0_mman *cache = MALLOC_STRUCT(nvc0_mman); +   int i; + +   if (!cache) +      return NULL; + +   cache->dev = dev; +   cache->domain = domain; +   cache->storage_type = storage_type; +   cache->allocated = 0; + +   for (i = 0; i < MM_NUM_BUCKETS; ++i) { +      LIST_INITHEAD(&cache->bucket[i].free); +      LIST_INITHEAD(&cache->bucket[i].used); +      LIST_INITHEAD(&cache->bucket[i].full); +   } + +   return cache; +} + diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c new file mode 100644 index 0000000000..cf7b8e347f --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -0,0 +1,686 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define NOUVEAU_DEBUG 1 + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +boolean +nvc0_insn_can_load(struct nv_instruction *nvi, int s, +                   struct nv_instruction *ld) +{ +   int i; + +   if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) { +      if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s))) +         return FALSE; +      if (!(nvc0_op_info_table[nvi->opcode].immediate & 4)) +         if (ld->src[0]->value->reg.imm.u32 & 0xfff) +            return FALSE; +   } else +   if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s))) +      return FALSE; + +   if (ld->indirect >= 0) +      return FALSE; + +   for (i = 0; i < 3 && nvi->src[i]; ++i) +      if (nvi->src[i]->value->reg.file == NV_FILE_IMM) +         return FALSE; + +   return TRUE; +} + +/* Return whether this instruction can be executed conditionally. */ +boolean +nvc0_insn_is_predicateable(struct nv_instruction *nvi) +{ +   int s; + +   if (!nv_op_predicateable(nvi->opcode)) +      return FALSE; +   if (nvi->predicate >= 0) +      return FALSE; +   for (s = 0; s < 4 && nvi->src[s]; ++s) +      if (nvi->src[s]->value->reg.file == NV_FILE_IMM) +         return FALSE; +   return TRUE; +} + +int +nvc0_insn_refcount(struct nv_instruction *nvi) +{ +   int rc = 0; +   int i; +   for (i = 0; i < 5 && nvi->def[i]; ++i) { +      if (!nvi->def[i]) +         return rc; +      rc += nvi->def[i]->refc; +   } +   return rc; +} + +int +nvc0_pc_replace_value(struct nv_pc *pc, +		      struct nv_value *old_val, +		      struct nv_value *new_val) +{ +   int i, n, s; + +   if (old_val == new_val) +      return old_val->refc; + +   for (i = 0, n = 0; i < pc->num_refs; ++i) { +      if (pc->refs[i]->value == old_val) { +         ++n; +         for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s) +            if (pc->refs[i]->insn->src[s] == pc->refs[i]) +               break; +         assert(s < 6); +         nv_reference(pc, pc->refs[i]->insn, s, new_val); +      } +   } +   return n; +} + +struct nv_value * +nvc0_pc_find_constant(struct nv_ref *ref) +{ +   struct nv_value *src; + +   if (!ref) +      return NULL; + +   src = ref->value; +   while (src->insn && src->insn->opcode == NV_OP_MOV) { +      assert(!src->insn->src[0]->mod); +      src = src->insn->src[0]->value; +   } +   if ((src->reg.file == NV_FILE_IMM) || +       (src->insn && +        src->insn->opcode == NV_OP_LD && +        src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && +        src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15))) +      return src; +   return NULL; +} + +struct nv_value * +nvc0_pc_find_immediate(struct nv_ref *ref) +{ +   struct nv_value *src = nvc0_pc_find_constant(ref); + +   return (src && src->reg.file == NV_FILE_IMM) ? src : NULL; +} + +static void +nv_pc_free_refs(struct nv_pc *pc) +{ +   int i; +   for (i = 0; i < pc->num_refs; i += 64) +      FREE(pc->refs[i]); +   FREE(pc->refs); +} + +static const char * +edge_name(ubyte type) +{ +   switch (type) { +   case CFG_EDGE_FORWARD: return "forward"; +   case CFG_EDGE_BACK: return "back"; +   case CFG_EDGE_LOOP_ENTER: return "loop"; +   case CFG_EDGE_LOOP_LEAVE: return "break"; +   case CFG_EDGE_FAKE: return "fake"; +   default: +      return "?"; +   } +} + +void +nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, +                      void *priv) +{ +   struct nv_basic_block *bb[64], *bbb[16], *b; +   int j, p, pp; + +   bb[0] = root; +   p = 1; +   pp = 0; + +   while (p > 0) { +      b = bb[--p]; +      b->priv = 0; + +      for (j = 1; j >= 0; --j) { +         if (!b->out[j]) +            continue; + +         switch (b->out_kind[j]) { +         case CFG_EDGE_BACK: +            continue; +         case CFG_EDGE_FORWARD: +         case CFG_EDGE_FAKE: +            if (++b->out[j]->priv == b->out[j]->num_in) +               bb[p++] = b->out[j]; +            break; +         case CFG_EDGE_LOOP_ENTER: +            bb[p++] = b->out[j]; +            break; +         case CFG_EDGE_LOOP_LEAVE: +            bbb[pp++] = b->out[j]; +            break; +         default: +            assert(0); +            break; +         } +      } + +      f(priv, b); + +      if (!p) { +         p = pp; +         for (; pp > 0; --pp) +            bb[pp - 1] = bbb[pp - 1]; +      } +   } +} + +static void +nv_do_print_function(void *priv, struct nv_basic_block *b) +{ +   struct nv_instruction *i; + +   debug_printf("=== BB %i ", b->id); +   if (b->out[0]) +      debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id); +   if (b->out[1]) +      debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id); +   debug_printf("===\n"); + +   i = b->phi; +   if (!i) +      i = b->entry; +   for (; i; i = i->next) +      nvc0_print_instruction(i); +} + +void +nvc0_print_function(struct nv_basic_block *root) +{ +   if (root->subroutine) +      debug_printf("SUBROUTINE %i\n", root->subroutine); +   else +      debug_printf("MAIN\n"); + +   nvc0_pc_pass_in_order(root, nv_do_print_function, root); +} + +void +nvc0_print_program(struct nv_pc *pc) +{ +   int i; +   for (i = 0; i < pc->num_subroutines + 1; ++i) +      if (pc->root[i]) +         nvc0_print_function(pc->root[i]); +} + +#if NOUVEAU_DEBUG > 1 +static void +nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) +{ +   int i; + +   b->pass_seq = pc->pass_seq; + +   fprintf(f, "\t%i [shape=box]\n", b->id); + +   for (i = 0; i < 2; ++i) { +      if (!b->out[i]) +         continue; +      switch (b->out_kind[i]) { +      case CFG_EDGE_FORWARD: +         fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); +         break; +      case CFG_EDGE_LOOP_ENTER: +         fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id); +         break; +      case CFG_EDGE_LOOP_LEAVE: +         fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id); +         break; +      case CFG_EDGE_BACK: +         fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); +         continue; +      case CFG_EDGE_FAKE: +         fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id); +         break; +      default: +         assert(0); +         break; +      } +      if (b->out[i]->pass_seq < pc->pass_seq) +         nv_do_print_cfgraph(pc, f, b->out[i]); +   } +} + +/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */ +static void +nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) +{ +   FILE *f; + +   f = fopen(filepath, "a"); +   if (!f) +      return; + +   fprintf(f, "digraph G {\n"); + +   ++pc->pass_seq; + +   nv_do_print_cfgraph(pc, f, pc->root[subr]); + +   fprintf(f, "}\n"); + +   fclose(f); +} +#endif + +static INLINE void +nvc0_pc_print_binary(struct nv_pc *pc) +{ +   unsigned i; + +   NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8); + +   for (i = 0; i < pc->emit_size / 4; i += 2) { +      debug_printf("0x%08x ", pc->emit[i + 0]); +      debug_printf("0x%08x ", pc->emit[i + 1]); +      if ((i % 16) == 15) +         debug_printf("\n"); +   } +   debug_printf("\n"); +} + +static int +nvc0_emit_program(struct nv_pc *pc) +{ +   uint32_t *code = pc->emit; +   int n; + +   NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size); + +   pc->emit_pos = 0; +   for (n = 0; n < pc->num_blocks; ++n) { +      struct nv_instruction *i; +      struct nv_basic_block *b = pc->bb_list[n]; + +      for (i = b->entry; i; i = i->next) { +         nvc0_emit_instruction(pc, i); +         pc->emit += 2; +         pc->emit_pos += 8; +      } +   } +   assert(pc->emit == &code[pc->emit_size / 4]); + +   pc->emit[0] = 0x00001de7; +   pc->emit[1] = 0x80000000; +   pc->emit_size += 8; + +   pc->emit = code; + +#ifdef NOUVEAU_DEBUG +   nvc0_pc_print_binary(pc); +#else +   debug_printf("not printing binary\n"); +#endif +   return 0; +} + +int +nvc0_generate_code(struct nvc0_translation_info *ti) +{ +   struct nv_pc *pc; +   int ret; +   int i; + +   pc = CALLOC_STRUCT(nv_pc); +   if (!pc) +      return 1; + +   pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT; + +   pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0])); +   if (!pc->root) { +      FREE(pc); +      return 1; +   } +   pc->num_subroutines = ti->num_subrs; + +   ret = nvc0_tgsi_to_nc(pc, ti); +   if (ret) +      goto out; +#if NOUVEAU_DEBUG > 1 +   nvc0_print_program(pc); +#endif + +   pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE; + +   /* optimization */ +   ret = nvc0_pc_exec_pass0(pc); +   if (ret) +      goto out; +#ifdef NOUVEAU_DEBUG +   nvc0_print_program(pc); +#endif + +   /* register allocation */ +   ret = nvc0_pc_exec_pass1(pc); +   if (ret) +      goto out; +#if NOUVEAU_DEBUG > 1 +   nv_print_program(pc); +   nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0); +#endif + +   /* prepare for emission */ +   ret = nvc0_pc_exec_pass2(pc); +   if (ret) +      goto out; +   assert(!(pc->emit_size % 8)); + +   pc->emit = CALLOC(pc->emit_size / 4 + 2, 4); +   if (!pc->emit) { +      ret = 3; +      goto out; +   } +   ret = nvc0_emit_program(pc); +   if (ret) +      goto out; + +   ti->prog->code = pc->emit; +   ti->prog->code_base = 0; +   ti->prog->code_size = pc->emit_size; +   ti->prog->parm_size = 0; + +   ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1); + +   ti->prog->relocs = pc->reloc_entries; +   ti->prog->num_relocs = pc->num_relocs; + +   NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); + +out: +   nv_pc_free_refs(pc); + +   for (i = 0; i < pc->num_blocks; ++i) +      FREE(pc->bb_list[i]); +   if (pc->root) +      FREE(pc->root); +   if (ret) { +      /* on success, these will be referenced by struct nvc0_program */ +      if (pc->emit) +         FREE(pc->emit); +      if (pc->immd_buf) +         FREE(pc->immd_buf); +      if (pc->reloc_entries) +         FREE(pc->reloc_entries); +   } +   FREE(pc); +   return ret; +} + +static void +nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i) +{ +   if (!b->phi) { +      i->prev = NULL; +      b->phi = i; +      i->next = b->entry; +      if (b->entry) { +         assert(!b->entry->prev && b->exit); +         b->entry->prev = i; +      } else { +         b->entry = i; +         b->exit = i; +      } +   } else { +      assert(b->entry); +      if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */ +         assert(b->entry == b->exit); +         b->entry->next = i; +         i->prev = b->entry; +         b->entry = i; +         b->exit = i; +      } else { /* insert before entry */ +         assert(b->entry->prev && b->exit); +         i->next = b->entry; +         i->prev = b->entry->prev; +         b->entry->prev = i; +         i->prev->next = i; +      } +   } +} + +void +nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i) +{ +   if (i->opcode == NV_OP_PHI) { +      nvbb_insert_phi(b, i); +   } else { +      i->prev = b->exit; +      if (b->exit) +         b->exit->next = i; +      b->exit = i; +      if (!b->entry) +         b->entry = i; +      else +      if (i->prev && i->prev->opcode == NV_OP_PHI) +         b->entry = i; +   } + +   i->bb = b; +   b->num_instructions++; +} + +void +nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni) +{ +   if (!at->next) { +      nvc0_insn_append(at->bb, ni); +      return; +   } +   ni->next = at->next; +   ni->prev = at; +   ni->next->prev = ni; +   ni->prev->next = ni; +} + +void +nvc0_insn_delete(struct nv_instruction *nvi) +{ +   struct nv_basic_block *b = nvi->bb; +   int s; + +   /* debug_printf("REM: "); nv_print_instruction(nvi); */ + +   for (s = 0; s < 6 && nvi->src[s]; ++s) +      nv_reference(NULL, nvi, s, NULL); + +   if (nvi->next) +      nvi->next->prev = nvi->prev; +   else { +      assert(nvi == b->exit); +      b->exit = nvi->prev; +   } + +   if (nvi->prev) +      nvi->prev->next = nvi->next; + +   if (nvi == b->entry) { +      /* PHIs don't get hooked to b->entry */ +      b->entry = nvi->next; +      assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI); +   } + +   if (nvi == b->phi) { +      if (nvi->opcode != NV_OP_PHI) +         NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n"); + +      assert(!nvi->prev); +      if (!nvi->next || nvi->next->opcode != NV_OP_PHI) +         b->phi = NULL; +      else +         b->phi = nvi->next; +   } +} + +void +nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2) +{ +   struct nv_basic_block *b = i1->bb; + +   assert(i1->opcode != NV_OP_PHI && +          i2->opcode != NV_OP_PHI); +   assert(i1->next == i2); + +   if (b->exit == i2) +      b->exit = i1; + +   if (b->entry == i1) +      b->entry = i2; + +   i2->prev = i1->prev; +   i1->next = i2->next; +   i2->next = i1; +   i1->prev = i2; + +   if (i2->prev) +      i2->prev->next = i2; +   if (i1->next) +      i1->next->prev = i1; +} + +void +nvc0_bblock_attach(struct nv_basic_block *parent, +		   struct nv_basic_block *b, ubyte edge_kind) +{ +   assert(b->num_in < 8); + +   if (parent->out[0]) { +      assert(!parent->out[1]); +      parent->out[1] = b; +      parent->out_kind[1] = edge_kind; +   } else { +      parent->out[0] = b; +      parent->out_kind[0] = edge_kind; +   } + +   b->in[b->num_in] = parent; +   b->in_kind[b->num_in++] = edge_kind; +} + +/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */ + +boolean +nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) +{ +   int j; + +   if (b == d) +      return TRUE; + +   for (j = 0; j < b->num_in; ++j) +      if ((b->in_kind[j] != CFG_EDGE_BACK) && +          !nvc0_bblock_dominated_by(b->in[j], d)) +         return FALSE; + +   return j ? TRUE : FALSE; +} + +/* check if @bf (future) can be reached from @bp (past), stop at @bt */ +boolean +nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, +			 struct nv_basic_block *bt) +{ +   struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b; +   int i, p, n; + +   p = 0; +   n = 1; +   q[0] = bp; + +   while (p < n) { +      b = q[p++]; + +      if (b == bf) +         break; +      if (b == bt) +         continue; +      assert(n <= (1024 - 2)); + +      for (i = 0; i < 2; ++i) { +         if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) { +            q[n] = b->out[i]; +            q[n++]->priv = 1; +         } +      } +   } +   for (--n; n >= 0; --n) +      q[n]->priv = 0; + +   return (b == bf); +} + +static struct nv_basic_block * +nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df) +{ +   struct nv_basic_block *out; +   int i; + +   if (!nvc0_bblock_dominated_by(df, b)) { +      for (i = 0; i < df->num_in; ++i) { +         if (df->in_kind[i] == CFG_EDGE_BACK) +            continue; +         if (nvc0_bblock_dominated_by(df->in[i], b)) +            return df; +      } +   } +   for (i = 0; i < 2 && df->out[i]; ++i) { +      if (df->out_kind[i] == CFG_EDGE_BACK) +         continue; +      if ((out = nvbb_find_dom_frontier(b, df->out[i]))) +         return out; +   } +   return NULL; +} + +struct nv_basic_block * +nvc0_bblock_dom_frontier(struct nv_basic_block *b) +{ +   struct nv_basic_block *df; +   int i; + +   for (i = 0; i < 2 && b->out[i]; ++i) +      if ((df = nvbb_find_dom_frontier(b, b->out[i]))) +         return df; +   return NULL; +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h new file mode 100644 index 0000000000..df0314965a --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -0,0 +1,648 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NVC0_COMPILER_H__ +#define __NVC0_COMPILER_H__ + +#include <stdio.h> + +#ifndef NOUVEAU_DBG +#ifdef NOUVEAU_DEBUG +# define NOUVEAU_DBG(args...) debug_printf(args); +#else +# define NOUVEAU_DBG(args...) +#endif +#endif + +#ifndef NOUVEAU_ERR +#define NOUVEAU_ERR(fmt, args...) \ +   fprintf(stderr, "%s:%d -  "fmt, __FUNCTION__, __LINE__, ##args); +#endif + +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +/* pseudo opcodes */ +#define NV_OP_UNDEF      0 +#define NV_OP_BIND       1 +#define NV_OP_MERGE      2 +#define NV_OP_PHI        3 +#define NV_OP_SELECT     4 +#define NV_OP_NOP        5 + +/** + * BIND forces source operand i into the same register as destination operand i + * SELECT forces its multiple source operands and its destination operand into + *  one and the same register. + */ + +/* base opcodes */ +#define NV_OP_LD         6 +#define NV_OP_ST         7 +#define NV_OP_MOV        8 +#define NV_OP_AND        9 +#define NV_OP_OR        10 +#define NV_OP_XOR       11 +#define NV_OP_SHL       12 +#define NV_OP_SHR       13 +#define NV_OP_NOT       14 +#define NV_OP_SET       15 +#define NV_OP_ADD       16 +#define NV_OP_SUB       17 +#define NV_OP_MUL       18 +#define NV_OP_MAD       19 +#define NV_OP_ABS       20 +#define NV_OP_NEG       21 +#define NV_OP_MAX       22 +#define NV_OP_MIN       23 +#define NV_OP_CVT       24 +#define NV_OP_CEIL      25 +#define NV_OP_FLOOR     26 +#define NV_OP_TRUNC     27 +#define NV_OP_SAD       28 + +/* shader opcodes */ +#define NV_OP_VFETCH    29 +#define NV_OP_PFETCH    30 +#define NV_OP_EXPORT    31 +#define NV_OP_LINTERP   32 +#define NV_OP_PINTERP   33 +#define NV_OP_EMIT      34 +#define NV_OP_RESTART   35 +#define NV_OP_TEX       36 +#define NV_OP_TXB       37 +#define NV_OP_TXL       38 +#define NV_OP_TXF       39 +#define NV_OP_TXQ       40 +#define NV_OP_QUADOP    41 +#define NV_OP_DFDX      42 +#define NV_OP_DFDY      43 +#define NV_OP_KIL       44 + +/* control flow opcodes */ +#define NV_OP_BRA       45 +#define NV_OP_CALL      46 +#define NV_OP_RET       47 +#define NV_OP_EXIT      48 +#define NV_OP_BREAK     49 +#define NV_OP_BREAKADDR 50 +#define NV_OP_JOINAT    51 +#define NV_OP_JOIN      52 + +/* typed opcodes */ +#define NV_OP_ADD_F32   NV_OP_ADD +#define NV_OP_ADD_B32   53 +#define NV_OP_MUL_F32   NV_OP_MUL +#define NV_OP_MUL_B32   54 +#define NV_OP_ABS_F32   NV_OP_ABS +#define NV_OP_ABS_S32   55 +#define NV_OP_NEG_F32   NV_OP_NEG +#define NV_OP_NEG_S32   56 +#define NV_OP_MAX_F32   NV_OP_MAX +#define NV_OP_MAX_S32   57 +#define NV_OP_MAX_U32   58 +#define NV_OP_MIN_F32   NV_OP_MIN +#define NV_OP_MIN_S32   59 +#define NV_OP_MIN_U32   60 +#define NV_OP_SET_F32   61 +#define NV_OP_SET_S32   62 +#define NV_OP_SET_U32   63 +#define NV_OP_SAR       64 +#define NV_OP_RCP       65 +#define NV_OP_RSQ       66 +#define NV_OP_LG2       67 +#define NV_OP_SIN       68 +#define NV_OP_COS       69 +#define NV_OP_EX2       70 +#define NV_OP_PRESIN    71 +#define NV_OP_PREEX2    72 +#define NV_OP_SAT       73 + +/* newly added opcodes */ +#define NV_OP_SET_F32_AND 74 +#define NV_OP_SET_F32_OR  75 +#define NV_OP_SET_F32_XOR 76 +#define NV_OP_SELP        77 +#define NV_OP_SLCT        78 +#define NV_OP_SLCT_F32    NV_OP_SLCT +#define NV_OP_SLCT_S32    79 +#define NV_OP_SLCT_U32    80 +#define NV_OP_SUB_F32     NV_OP_SUB +#define NV_OP_SUB_S32     81 +#define NV_OP_MAD_F32     NV_OP_MAD +#define NV_OP_FSET_F32    82 + +#define NV_OP_COUNT     83 + +/* nv50 files omitted */ +#define NV_FILE_GPR      0 +#define NV_FILE_COND     1 +#define NV_FILE_PRED     2 +#define NV_FILE_IMM      16 +#define NV_FILE_MEM_S    32 +#define NV_FILE_MEM_V    34 +#define NV_FILE_MEM_A    35 +#define NV_FILE_MEM_L    48 +#define NV_FILE_MEM_G    64 +#define NV_FILE_MEM_C(i) (80 + i) + +#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S) + +#define NV_MOD_NEG 1 +#define NV_MOD_ABS 2 +#define NV_MOD_NOT 4 +#define NV_MOD_SAT 8 + +#define NV_TYPE_U8  0x00 +#define NV_TYPE_S8  0x01 +#define NV_TYPE_U16 0x02 +#define NV_TYPE_S16 0x03 +#define NV_TYPE_U32 0x04 +#define NV_TYPE_S32 0x05 +#define NV_TYPE_P32 0x07 +#define NV_TYPE_F32 0x09 +#define NV_TYPE_F64 0x0b +#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4)) +#define NV_TYPE_ANY 0xff + +#define NV_TYPE_ISINT(t) ((t) < 7) +#define NV_TYPE_ISSGD(t) ((t) & 1) + +#define NV_CC_FL 0x0 +#define NV_CC_LT 0x1 +#define NV_CC_EQ 0x2 +#define NV_CC_LE 0x3 +#define NV_CC_GT 0x4 +#define NV_CC_NE 0x5 +#define NV_CC_GE 0x6 +#define NV_CC_U  0x8 +#define NV_CC_TR 0xf +#define NV_CC_O  0x10 +#define NV_CC_C  0x11 +#define NV_CC_A  0x12 +#define NV_CC_S  0x13 + +#define NV_PC_MAX_INSTRUCTIONS 2048 +#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) + +#define NV_PC_MAX_BASIC_BLOCKS 1024 + +struct nv_op_info { +   uint base;                /* e.g. ADD_S32 -> ADD */ +   char name[12]; +   uint8_t type; +   uint8_t mods; +   unsigned flow        : 1; +   unsigned commutative : 1; +   unsigned vector      : 1; +   unsigned predicate   : 1; +   unsigned pseudo      : 1; +   unsigned immediate   : 3; +   unsigned memory      : 3; +}; + +extern struct nv_op_info nvc0_op_info_table[]; + +#define NV_BASEOP(op) (nvc0_op_info_table[op].base) +#define NV_OPTYPE(op) (nvc0_op_info_table[op].type) + +static INLINE uint +nv_op_base(uint opcode) +{ +   return nvc0_op_info_table[opcode].base; +} + +static INLINE boolean +nv_is_texture_op(uint opcode) +{ +   return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ); +} + +static INLINE boolean +nv_is_vector_op(uint opcode) +{ +   return nvc0_op_info_table[opcode].vector ? TRUE : FALSE; +} + +static INLINE boolean +nv_op_commutative(uint opcode) +{ +   return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE; +} + +static INLINE uint8_t +nv_op_supported_src_mods(uint opcode) +{ +   return nvc0_op_info_table[opcode].mods; +} + +static INLINE boolean +nv_op_predicateable(uint opcode) +{ +   return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE; +} + +static INLINE uint +nv_type_order(ubyte type) +{ +   switch (type & 0xf) { +   case NV_TYPE_U8: +   case NV_TYPE_S8: +      return 0; +   case NV_TYPE_U16: +   case NV_TYPE_S16: +      return 1; +   case NV_TYPE_U32: +   case NV_TYPE_F32: +   case NV_TYPE_S32: +   case NV_TYPE_P32: +      return 2; +   case NV_TYPE_F64: +      return 3; +   } +   assert(0); +   return 0; +} + +static INLINE uint +nv_type_sizeof(ubyte type) +{ +   if (type & 0xf0) +      return (1 << nv_type_order(type)) * (type >> 4); +   return 1 << nv_type_order(type); +} + +static INLINE uint +nv_type_sizeof_base(ubyte type) +{ +   return 1 << nv_type_order(type); +} + +struct nv_reg { +   uint32_t address; /* for memory locations */ +   int id; /* for registers */ +   ubyte file; +   ubyte size; +   union { +      int32_t s32; +      int64_t s64; +      uint64_t u64; +      uint32_t u32; +      float f32; +      double f64; +   } imm; +}; + +struct nv_range { +   struct nv_range *next; +   int bgn; +   int end; +}; + +struct nv_ref; + +struct nv_value { +   struct nv_reg reg;  +   struct nv_instruction *insn; +   struct nv_value *join; +   struct nv_ref *last_use; +   int n; +   struct nv_range *livei; +   int refc; +   struct nv_value *next; +   struct nv_value *prev; +}; + +struct nv_ref { +   struct nv_value *value; +   struct nv_instruction *insn; +   struct list_head list; /* connects uses of the same value */ +   uint8_t mod; +   uint8_t flags; +}; + +struct nv_basic_block; + +struct nv_instruction { +   struct nv_instruction *next; +   struct nv_instruction *prev; +   uint opcode; +   uint serial; + +   struct nv_value *def[5]; +   struct nv_ref *src[6]; + +   int8_t predicate; /* index of predicate src */ +   int8_t indirect;  /* index of pointer src */ + +   union { +      struct { +         uint8_t t; /* TIC binding */ +         uint8_t s; /* TSC binding */ +      } tex; +      struct { +         uint8_t d; /* output type */ +         uint8_t s; /* input type */ +      } cvt; +   } ext; + +   struct nv_basic_block *bb; +   struct nv_basic_block *target; /* target block of control flow insn */ + +   unsigned cc         : 5; /* condition code */ +   unsigned fixed      : 1; /* don't optimize away (prematurely) */ +   unsigned terminator : 1; +   unsigned join       : 1; +   unsigned set_cond   : 4; /* 2nd byte */ +   unsigned saturate   : 1; +   unsigned centroid   : 1; +   unsigned flat       : 1; +   unsigned patch      : 1; +   unsigned lanes      : 4; /* 3rd byte */ +   unsigned tex_argc   : 3; +   unsigned tex_live   : 1; +   unsigned tex_cube   : 1; /* 4th byte */ +   unsigned tex_mask   : 4; + +   uint8_t quadop; +}; + +static INLINE int +nvi_vector_size(struct nv_instruction *nvi) +{ +   int i; +   assert(nvi); +   for (i = 0; i < 5 && nvi->def[i]; ++i); +   return i; +} + +#define CFG_EDGE_FORWARD     0 +#define CFG_EDGE_BACK        1 +#define CFG_EDGE_LOOP_ENTER  2 +#define CFG_EDGE_LOOP_LEAVE  4 +#define CFG_EDGE_FAKE        8 + +/* 'WALL' edge means where reachability check doesn't follow */ +/* 'LOOP' edge means just having to do with loops */ +#define IS_LOOP_EDGE(k) ((k) & 7) +#define IS_WALL_EDGE(k) ((k) & 9) + +struct nv_basic_block { +   struct nv_instruction *entry; /* first non-phi instruction */ +   struct nv_instruction *exit; +   struct nv_instruction *phi; /* very first instruction */ +   int num_instructions; + +   struct nv_basic_block *out[2]; /* no indirect branches -> 2 */ +   struct nv_basic_block *in[8]; /* hope that suffices */ +   uint num_in; +   ubyte out_kind[2]; +   ubyte in_kind[8]; + +   int id; +   int subroutine; +   uint priv; /* reset to 0 after you're done */ +   uint pass_seq; + +   uint32_t emit_pos; /* position, size in emitted code (in bytes) */ +   uint32_t emit_size; + +   uint32_t live_set[NV_PC_MAX_VALUES / 32]; +}; + +struct nvc0_translation_info; + +struct nv_pc { +   struct nv_basic_block **root; +   struct nv_basic_block *current_block; +   struct nv_basic_block *parent_block; + +   int loop_nesting_bound; +   uint pass_seq; + +   struct nv_value values[NV_PC_MAX_VALUES]; +   struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS]; +   struct nv_ref **refs; +   struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS]; +   int num_values; +   int num_instructions; +   int num_refs; +   int num_blocks; +   int num_subroutines; + +   int max_reg[4]; + +   uint32_t *immd_buf; /* populated on emit */ +   unsigned immd_count; + +   uint32_t *emit; +   uint32_t emit_size; +   uint32_t emit_pos; + +   void *reloc_entries; +   unsigned num_relocs; + +   /* optimization enables */ +   boolean opt_reload_elim; +   boolean is_fragprog; +}; + +void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *); +void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *); + +static INLINE struct nv_instruction * +nv_alloc_instruction(struct nv_pc *pc, uint opcode) +{ +   struct nv_instruction *insn; + +   insn = &pc->instructions[pc->num_instructions++]; +   assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); + +   insn->opcode = opcode; +   insn->cc = 0; +   insn->indirect = -1; +   insn->predicate = -1; + +   return insn; +} + +static INLINE struct nv_instruction * +new_instruction(struct nv_pc *pc, uint opcode) +{ +   struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); + +   nvc0_insn_append(pc->current_block, insn); +   return insn; +} + +static INLINE struct nv_instruction * +new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode) +{ +   struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); + +   nvc0_insn_insert_after(at, insn); +   return insn; +} + +static INLINE struct nv_value * +new_value(struct nv_pc *pc, ubyte file, ubyte size) +{ +   struct nv_value *value = &pc->values[pc->num_values]; + +   assert(pc->num_values < NV_PC_MAX_VALUES - 1); + +   value->n = pc->num_values++; +   value->join = value; +   value->reg.id = -1; +   value->reg.file = file; +   value->reg.size = size; +   return value; +} + +static INLINE struct nv_value * +new_value_like(struct nv_pc *pc, struct nv_value *like) +{ +   return new_value(pc, like->reg.file, like->reg.size); +} + +static INLINE struct nv_ref * +new_ref(struct nv_pc *pc, struct nv_value *val) +{ +   int i; +   struct nv_ref *ref; + +   if ((pc->num_refs % 64) == 0) { +      const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *); +      const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *); + +      pc->refs = REALLOC(pc->refs, old_size, new_size); + +      ref = CALLOC(64, sizeof(struct nv_ref)); +      for (i = 0; i < 64; ++i) +         pc->refs[pc->num_refs + i] = &ref[i]; +   } + +   ref = pc->refs[pc->num_refs++]; +   ref->value = val; + +   LIST_INITHEAD(&ref->list); + +   ++val->refc; +   return ref; +} + +static INLINE struct nv_basic_block * +new_basic_block(struct nv_pc *pc) +{ +   struct nv_basic_block *bb; + +   if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS) +      return NULL; + +   bb = CALLOC_STRUCT(nv_basic_block); + +   bb->id = pc->num_blocks; +   pc->bb_list[pc->num_blocks++] = bb; +   return bb; +} + +static INLINE void +nv_reference(struct nv_pc *pc, +             struct nv_instruction *nvi, int c, struct nv_value *s) +{ +   struct nv_ref **d = &nvi->src[c]; +   assert(c < 6); + +   if (*d) { +      --(*d)->value->refc; +      LIST_DEL(&(*d)->list); +   } + +   if (s) { +      if (!*d) { +         *d = new_ref(pc, s); +         (*d)->insn = nvi; +      } else { +         LIST_DEL(&(*d)->list); +         (*d)->value = s; +         ++(s->refc); +      } +      if (!s->last_use) +         s->last_use = *d; +      else +         LIST_ADDTAIL(&s->last_use->list, &(*d)->list); + +      s->last_use = *d; +      (*d)->insn = nvi; +   } else { +      *d = NULL; +   } +} + +/* nvc0_emit.c */ +void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *); + +/* nvc0_print.c */ +const char *nvc0_opcode_name(uint opcode); +void nvc0_print_instruction(struct nv_instruction *); + +/* nvc0_pc.c */ +void nvc0_print_function(struct nv_basic_block *root); +void nvc0_print_program(struct nv_pc *); + +boolean nvc0_insn_can_load(struct nv_instruction *, int s, +                           struct nv_instruction *); +boolean nvc0_insn_is_predicateable(struct nv_instruction *); + +int nvc0_insn_refcount(struct nv_instruction *); +void nvc0_insn_delete(struct nv_instruction *); +void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *); + +void nvc0_bblock_attach(struct nv_basic_block *parent, +                        struct nv_basic_block *child, ubyte edge_kind); +boolean nvc0_bblock_dominated_by(struct nv_basic_block *, +                                 struct nv_basic_block *); +boolean nvc0_bblock_reachable_by(struct nv_basic_block *future, +                                 struct nv_basic_block *past, +                                 struct nv_basic_block *final); +struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *); + +int nvc0_pc_replace_value(struct nv_pc *pc, +                          struct nv_value *old_val, +                          struct nv_value *new_val); + +struct nv_value *nvc0_pc_find_immediate(struct nv_ref *); +struct nv_value *nvc0_pc_find_constant(struct nv_ref *); + +typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b); + +void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *); + +int nvc0_pc_exec_pass0(struct nv_pc *pc); +int nvc0_pc_exec_pass1(struct nv_pc *pc); +int nvc0_pc_exec_pass2(struct nv_pc *pc); + +int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *); + +#endif // NV50_COMPILER_H diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c new file mode 100644 index 0000000000..cd1ad03b00 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -0,0 +1,960 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +#define NVC0_FIXUP_CODE_RELOC 0 +#define NVC0_FIXUP_DATA_RELOC 1 + +struct nvc0_fixup { +   uint8_t type; +   int8_t shift; +   uint32_t mask; +   uint32_t data; +   uint32_t ofst; +}; + +void +nvc0_relocate_program(struct nvc0_program *prog, +                      uint32_t code_base, +                      uint32_t data_base) +{ +   struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs; +   unsigned i; + +   for (i = 0; i < prog->num_relocs; ++i) { +      uint32_t data; + +      switch (f[i].type) { +      case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break; +      case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break; +      default: +         data = f[i].data; +         break; +      } +      data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift); + +      prog->code[f[i].ofst / 4] &= ~f[i].mask; +      prog->code[f[i].ofst / 4] |= data & f[i].mask; +   } +} + +static void +create_fixup(struct nv_pc *pc, uint8_t ty, +             int w, uint32_t data, uint32_t m, int s) +{ +   struct nvc0_fixup *f; + +   const unsigned size = sizeof(struct nvc0_fixup); +   const unsigned n = pc->num_relocs; + +   if (!(n % 8)) +      pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size); + +   f = (struct nvc0_fixup *)pc->reloc_entries; + +   f[n].ofst = pc->emit_pos + w * 4; +   f[n].type = ty; +   f[n].data = data; +   f[n].mask = m; +   f[n].shift = s; + +   ++pc->num_relocs; +} + +static INLINE ubyte +SSIZE(struct nv_instruction *nvi, int s) +{ +   return nvi->src[s]->value->reg.size; +} + +static INLINE ubyte +DSIZE(struct nv_instruction *nvi, int d) +{ +   return nvi->def[d]->reg.size; +} + +static INLINE struct nv_reg * +SREG(struct nv_ref *ref) +{ +   if (!ref) +      return NULL; +   return &ref->value->join->reg; +} + +static INLINE struct nv_reg * +DREG(struct nv_value *val) +{ +   if (!val) +      return NULL; +   return &val->join->reg; +} + +static INLINE ubyte +SFILE(struct nv_instruction *nvi, int s) +{ +   return nvi->src[s]->value->reg.file; +} + +static INLINE ubyte +DFILE(struct nv_instruction *nvi, int d) +{ +   return nvi->def[0]->reg.file; +} + +static INLINE void +SID(struct nv_pc *pc, struct nv_ref *ref, int pos) +{ +   pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32); +} + +static INLINE void +DID(struct nv_pc *pc, struct nv_value *val, int pos) +{ +   pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32); +} + +static INLINE uint32_t +get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */ +{ +   assert(ref->value->reg.file == NV_FILE_IMM); +   return ref->value->reg.imm.u32; +} + +static INLINE void +set_immd_u32_l(struct nv_pc *pc, uint32_t u32) +{ +   pc->emit[0] |= (u32 & 0x3f) << 26; +   pc->emit[1] |= u32 >> 6; +} + +static INLINE void +set_immd_u32(struct nv_pc *pc, uint32_t u32) +{ +   if ((pc->emit[0] & 0xf) == 0x2) { +      set_immd_u32_l(pc, u32); +   } else +   if ((pc->emit[0] & 0xf) == 0x3) { +      assert(!(pc->emit[1] & 0xc000)); +      pc->emit[1] |= 0xc000; +      assert(!(u32 & 0xfff00000)); +      set_immd_u32_l(pc, u32); +   } else { +      assert(!(pc->emit[1] & 0xc000)); +      pc->emit[1] |= 0xc000; +      assert(!(u32 & 0xfff)); +      set_immd_u32_l(pc, u32 >> 12); +   } +} + +static INLINE void +set_immd(struct nv_pc *pc, struct nv_instruction *i, int s) +{ +   set_immd_u32(pc, get_immd_u32(i->src[s])); +} + +static INLINE void +DVS(struct nv_pc *pc, struct nv_instruction *i) +{ +   uint s = i->def[0]->reg.size; +   int n; +   for (n = 1; n < 4 && i->def[n]; ++n) +      s += i->def[n]->reg.size; +   pc->emit[0] |= ((s / 4) - 1) << 5; +} + +static INLINE void +SVS(struct nv_pc *pc, struct nv_ref *src) +{ +   pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5; +} + +static void +set_pred(struct nv_pc *pc, struct nv_instruction *i) +{ +   if (i->predicate >= 0) { +      SID(pc, i->src[i->predicate], 6); +      if (i->cc) +         pc->emit[0] |= 0x2000; /* negate */ +   } else { +      pc->emit[0] |= 0x1c00; +   }	    +} + +static INLINE void +set_address_16(struct nv_pc *pc, struct nv_ref *src) +{ +   pc->emit[0] |= (src->value->reg.address & 0x003f) << 26; +   pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6; +} + +static INLINE unsigned +const_space_index(struct nv_instruction *i, int s) +{ +   return SFILE(i, s) - NV_FILE_MEM_C(0); +} + +static void +emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) +{ +   pc->emit[0] = 0x00000007; +   pc->emit[1] = op << 24; + +   if (op == 0x40 || (op >= 0x80 && op <= 0x98)) { +      /* bra, exit, ret or kil */ +      pc->emit[0] |= 0x1e0; +      set_pred(pc, i); +   } + +   if (i->target) { +      int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8); + +      /* we will need relocations only for global functions */ +      /* +      create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000); +      create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff); +      */ + +      pc->emit[0] |= (pcrel & 0x3f) << 26; +      pc->emit[1] |= (pcrel >> 6) & 0x1ffff; +   } +} + +/* doesn't work for vfetch, export, ld, st, mov ... */ +static void +emit_form_0(struct nv_pc *pc, struct nv_instruction *i) +{ +   int s; + +   set_pred(pc, i); + +   DID(pc, i->def[0], 14); + +   for (s = 0; s < 3 && i->src[s]; ++s) { +      if (SFILE(i, s) >= NV_FILE_MEM_C(0) && +          SFILE(i, s) <= NV_FILE_MEM_C(15)) { +         assert(!(pc->emit[1] & 0xc000)); +         assert(s <= 1); +         pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); +         set_address_16(pc, i->src[s]); +      } else +      if (SFILE(i, s) == NV_FILE_GPR) { +         SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20); +      } else +      if (SFILE(i, s) == NV_FILE_IMM) { +         assert(!(pc->emit[1] & 0xc000)); +         assert(s == 1 || i->opcode == NV_OP_MOV); +         set_immd(pc, i, s); +      } +   } +} + +static void +emit_form_1(struct nv_pc *pc, struct nv_instruction *i) +{ +   int s; + +   set_pred(pc, i); + +   DID(pc, i->def[0], 14); + +   for (s = 0; s < 1 && i->src[s]; ++s) { +      if (SFILE(i, s) >= NV_FILE_MEM_C(0) && +          SFILE(i, s) <= NV_FILE_MEM_C(15)) { +         assert(!(pc->emit[1] & 0xc000)); +         assert(s <= 1); +         pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); +         set_address_16(pc, i->src[s]); +      } else +      if (SFILE(i, s) == NV_FILE_GPR) { +         SID(pc, i->src[s], 26); +      } else +      if (SFILE(i, s) == NV_FILE_IMM) { +         assert(!(pc->emit[1] & 0xc000)); +         assert(s == 1 || i->opcode == NV_OP_MOV); +         set_immd(pc, i, s); +      } +   } +} + +static void +emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i) +{ +   if (i->src[0]->mod & NV_MOD_ABS) +      pc->emit[0] |= 1 << 7; +   if (i->src[0]->mod & NV_MOD_NEG) +      pc->emit[0] |= 1 << 9; +   if (i->src[1]->mod & NV_MOD_ABS) +      pc->emit[0] |= 1 << 6; +   if (i->src[1]->mod & NV_MOD_NEG) +      pc->emit[0] |= 1 << 8; +} + +static void +emit_add_f32(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000000; +   pc->emit[1] = 0x50000000; + +   emit_form_0(pc, i); + +   emit_neg_abs_1_2(pc, i); + +   if (i->saturate) +      pc->emit[1] |= 1 << 17; +} + +static void +emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000000; +   pc->emit[1] = 0x58000000; + +   emit_form_0(pc, i); + +   if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) +      pc->emit[1] |= 1 << 25; + +   if (i->saturate) +      pc->emit[0] |= 1 << 5; +} + +static void +emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000000; +   pc->emit[1] = 0x30000000; + +   emit_form_0(pc, i); + +   if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) +      pc->emit[0] |= 1 << 9; + +   if (i->src[2]->mod & NV_MOD_NEG) +      pc->emit[0] |= 1 << 8; + +   if (i->saturate) +      pc->emit[0] |= 1 << 5; +} + +static void +emit_minmax(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000000; +   pc->emit[1] = 0x08000000; + +   if (NV_BASEOP(i->opcode) == NV_OP_MAX) +      pc->emit[1] |= 0x001e0000; +   else +      pc->emit[1] |= 0x000e0000; /* predicate ? */ + +   emit_form_0(pc, i); + +   emit_neg_abs_1_2(pc, i); + +   switch (i->opcode) { +   case NV_OP_MIN_U32: +   case NV_OP_MAX_U32: +      pc->emit[0] |= 3; +      break; +   case NV_OP_MIN_S32: +   case NV_OP_MAX_S32: +      pc->emit[0] |= 3 | (1 << 5); +      break; +   case NV_OP_MIN_F32: +   case NV_OP_MAX_F32: +   default: +      break; +   } +} + +static void +emit_tex(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000086; +   pc->emit[1] = 0x80000000; + +   if (i->opcode == NV_OP_TXB) pc->emit[1] |= 0x04000000; +   else +   if (i->opcode == NV_OP_TXL) pc->emit[1] |= 0x06000000; + +   set_pred(pc, i); + +   if (1) +      pc->emit[0] |= 63 << 26; /* explicit derivatives */ + +   DID(pc, i->def[0], 14); +   SID(pc, i->src[0], 20); + +   pc->emit[1] |= i->tex_mask << 14; +   pc->emit[1] |= (i->tex_argc - 1) << 20; + +   assert(i->ext.tex.s < 16); + +   pc->emit[1] |= i->ext.tex.t; +   pc->emit[1] |= i->ext.tex.s << 8; + +   if (i->tex_live) +      pc->emit[0] |= 1 << 9; +} + +/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */ +static void +emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op) +{ +   pc->emit[0] = 0x00000000; +   pc->emit[1] = 0xc8000000; + +   set_pred(pc, i); + +   DID(pc, i->def[0], 14); +   SID(pc, i->src[0], 20); + +   pc->emit[0] |= op << 26; + +   if (op >= 4) { +      if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9; +      if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7; +   } else { +      assert(!i->src[0]->mod); +   } +} + +static void +emit_quadop(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000000; +   pc->emit[1] = 0x48000000; + +   set_pred(pc, i); + +   assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR); + +   DID(pc, i->def[0], 14); +   SID(pc, i->src[0], 20); +   SID(pc, i->src[0], 26); + +   pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */ +   pc->emit[1] |= i->quadop; +} + +static void +emit_ddx(struct nv_pc *pc, struct nv_instruction *i) +{ +   i->quadop = 0x99; +   i->lanes = 4; +   emit_quadop(pc, i); +} + +static void +emit_ddy(struct nv_pc *pc, struct nv_instruction *i) +{ +   i->quadop = 0xa5; +   i->lanes = 5; +   emit_quadop(pc, i); +} + +/* preparation op (preex2, presin / convert to fixed point) */ +static void +emit_preop(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000000; +   pc->emit[1] = 0x60000000; + +   if (i->opcode == NV_OP_PREEX2) +      pc->emit[0] |= 0x20; + +   emit_form_1(pc, i); + +   if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8; +   if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6; +} + +static void +emit_shift(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000003; + +   switch (i->opcode) { +   case NV_OP_SAR: +      pc->emit[0] |= 0x20; /* fall through */ +   case NV_OP_SHR: +      pc->emit[1] = 0x58000000; +      break; +   case NV_OP_SHL: +   default: +      pc->emit[1] = 0x60000000; +      break; +   } + +   emit_form_0(pc, i); +} + +static void +emit_bitop(struct nv_pc *pc, struct nv_instruction *i) +{ +   if (SFILE(i, 1) == NV_FILE_IMM) { +      pc->emit[0] = 0x00000002; +      pc->emit[1] = 0x38000000; +   } else { +      pc->emit[0] = 0x00000003; +      pc->emit[1] = 0x68000000; +   } +    +   switch (i->opcode) { +   case NV_OP_OR: +      pc->emit[0] |= 0x40; +      break; +   case NV_OP_XOR: +      pc->emit[0] |= 0x80; +      break; +   case NV_OP_AND: +   default: +      break; +   } + +   emit_form_0(pc, i); +} + +static void +emit_set(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000000; + +   switch (i->opcode) { +   case NV_OP_SET_S32: +      pc->emit[0] |= 0x20; /* fall through */ +   case NV_OP_SET_U32: +      pc->emit[0] |= 0x3; +      pc->emit[1] = 0x100e0000; +      break; +   case NV_OP_SET_F32_AND: +      pc->emit[1] = 0x18000000; +      break; +   case NV_OP_SET_F32_OR: +      pc->emit[1] = 0x18200000; +      break; +   case NV_OP_SET_F32_XOR: +      pc->emit[1] = 0x18400000; +      break; +   case NV_OP_FSET_F32: +      pc->emit[0] |= 0x20; /* fall through */ +   case NV_OP_SET_F32: +   default: +      pc->emit[1] = 0x180e0000; +      break; +   } + +   if (DFILE(i, 0) == NV_FILE_PRED) { +      pc->emit[0] |= 0x1c000; +      pc->emit[1] += 0x08000000; +   } + +   pc->emit[1] |= i->set_cond << 23; + +   emit_form_0(pc, i); + +   emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */ +} + +static void +emit_selp(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000004; +   pc->emit[1] = 0x20000000; + +   emit_form_0(pc, i); + +   if (i->cc || (i->src[2]->mod & NV_MOD_NOT)) +      pc->emit[1] |= 1 << 20; +} + +static void +emit_slct(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000000; + +   switch (i->opcode) { +   case NV_OP_SLCT_S32: +      pc->emit[0] |= 0x20; /* fall through */ +   case NV_OP_SLCT_U32: +      pc->emit[0] |= 0x3; +      pc->emit[1] = 0x30000000; +      break; +   case NV_OP_SLCT_F32: +   default: +      pc->emit[1] = 0x38000000; +      break; +   } + +   emit_form_0(pc, i); + +   pc->emit[1] |= i->set_cond << 23; +} + +static void +emit_cvt(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000004; +   pc->emit[1] = 0x10000000; + +   if (i->opcode != NV_OP_CVT) +      i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode); + +   switch (i->ext.cvt.d) { +   case NV_TYPE_F32: +      switch (i->ext.cvt.s) { +      case NV_TYPE_F32: pc->emit[1] = 0x10000000; break; +      case NV_TYPE_S32: pc->emit[0] |= 0x200; +      case NV_TYPE_U32: pc->emit[1] = 0x18000000; break; +      } +      break; +   case NV_TYPE_S32: pc->emit[0] |= 0x80; +   case NV_TYPE_U32: +      switch (i->ext.cvt.s) { +      case NV_TYPE_F32: pc->emit[1] = 0x14000000; break; +      case NV_TYPE_S32: pc->emit[0] |= 0x200; +      case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break; +      } +      break; +   default: +      assert(!"cvt: unknown type"); +      break; +   } + +   if (i->opcode == NV_OP_FLOOR) +      pc->emit[1] |= 0x00020000; +   else +   if (i->opcode == NV_OP_CEIL) +      pc->emit[1] |= 0x00040000; +   else +   if (i->opcode == NV_OP_TRUNC) +      pc->emit[1] |= 0x00060000; + +   if (i->saturate || i->opcode == NV_OP_SAT) +      pc->emit[0] |= 0x20; + +   if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS) +      pc->emit[0] |= 1 << 6; +   if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG) +      pc->emit[0] |= 1 << 8; + +   pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20; +   pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23; + +   emit_form_1(pc, i); +} + +static void +emit_interp(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000000; +   pc->emit[1] = 0xc07e0000; + +   DID(pc, i->def[0], 14); + +   set_pred(pc, i); + +   if (i->indirect) +      SID(pc, i->src[i->indirect], 20); +   else +      SID(pc, NULL, 20); + +   if (i->opcode == NV_OP_PINTERP) { +      pc->emit[0] |= 0x040; +      SID(pc, i->src[1], 26); +   } else { +      SID(pc, NULL, 26); +   } + +   pc->emit[1] |= i->src[0]->value->reg.address & 0xffff; + +   if (i->centroid) +      pc->emit[0] |= 0x100; +   else +   if (i->flat) +      pc->emit[0] |= 0x080; +} + +static void +emit_vfetch(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x03f00006; +   pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address; +   if (i->patch) +      pc->emit[0] |= 0x100; + +   set_pred(pc, i); + +   DVS(pc, i); +   DID(pc, i->def[0], 14); + +   SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26); +} + +static void +emit_export(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000006; +   pc->emit[1] = 0x0a000000; +   if (i->patch) +      pc->emit[0] |= 0x100; + +   set_pred(pc, i); + +   assert(SFILE(i, 0) == NV_FILE_MEM_V); +   assert(SFILE(i, 1) == NV_FILE_GPR); + +   SID(pc, i->src[1], 26); /* register source */ +   SVS(pc, i->src[0]); + +   pc->emit[1] |= i->src[0]->value->reg.address & 0xfff; + +   SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); +} + +static void +emit_mov(struct nv_pc *pc, struct nv_instruction *i) +{ +   if (i->opcode == NV_OP_MOV) +      i->lanes = 0xf; + +   if (SFILE(i, 0) == NV_FILE_IMM) { +      pc->emit[0] = 0x000001e2; +      pc->emit[1] = 0x18000000; +   } else +   if (SFILE(i, 0) == NV_FILE_PRED) { +      pc->emit[0] = 0x1c000004; +      pc->emit[1] = 0x080e0000; +   } else { +      pc->emit[0] = 0x00000004 | (i->lanes << 5); +      pc->emit[1] = 0x28000000; +   } + +   emit_form_1(pc, i); +} + +static void +emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i) +{ +   assert(NV_IS_MEMORY_FILE(SFILE(i, 0))); + +   switch (SSIZE(i, 0)) { +   case 1: +      if (NV_TYPE_ISSGD(i->ext.cvt.s)) +         pc->emit[0] |= 0x20; +      break; +   case 2: +      pc->emit[0] |= 0x40; +      if (NV_TYPE_ISSGD(i->ext.cvt.s)) +         pc->emit[0] |= 0x20; +      break; +   case 4: pc->emit[0] |= 0x80; break; +   case 8: pc->emit[0] |= 0xa0; break; +   case 16: pc->emit[0] |= 0xc0; break; +   default: +      NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0)); +      break; +   } +} + +static void +emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +{ +   pc->emit[0] = 0x00000006; +   pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); + +   emit_ldst_size(pc, i); + +   set_pred(pc, i); +   set_address_16(pc, i->src[0]); + +   SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); +   DID(pc, i->def[0], 14); +} + +static void +emit_ld(struct nv_pc *pc, struct nv_instruction *i) +{ +   if (SFILE(i, 0) >= NV_FILE_MEM_C(0) && +       SFILE(i, 0) <= NV_FILE_MEM_C(15)) { +      emit_ld_const(pc, i); +   } else { +      NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0)); +      abort(); +   } +} + +static void +emit_st(struct nv_pc *pc, struct nv_instruction *i) +{ +   NOUVEAU_ERR("emit_st: not handled yet\n"); +   abort(); +} + +void +nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) +{ +   debug_printf("EMIT: "); nvc0_print_instruction(i); + +   switch (i->opcode) { +   case NV_OP_VFETCH: +      emit_vfetch(pc, i); +      break; +   case NV_OP_EXPORT: +      if (!pc->is_fragprog) +         emit_export(pc, i); +      break; +   case NV_OP_MOV: +      emit_mov(pc, i); +      break; +   case NV_OP_LD: +      emit_ld(pc, i); +      break; +   case NV_OP_ST: +      emit_st(pc, i); +      break; +   case NV_OP_LINTERP: +   case NV_OP_PINTERP: +      emit_interp(pc, i); +      break; +   case NV_OP_ADD_F32: +      emit_add_f32(pc, i); +      break; +   case NV_OP_AND: +   case NV_OP_OR: +   case NV_OP_XOR: +      emit_bitop(pc, i); +      break; +   case NV_OP_CVT: +   case NV_OP_ABS_F32: +   case NV_OP_ABS_S32: +   case NV_OP_NEG_F32: +   case NV_OP_NEG_S32: +   case NV_OP_SAT: +   case NV_OP_CEIL: +   case NV_OP_FLOOR: +   case NV_OP_TRUNC: +      emit_cvt(pc, i); +      break; +   case NV_OP_DFDX: +      emit_ddx(pc, i); +      break; +   case NV_OP_DFDY: +      emit_ddy(pc, i); +      break; +   case NV_OP_COS: +      emit_flop(pc, i, 0); +      break; +   case NV_OP_SIN: +      emit_flop(pc, i, 1); +      break; +   case NV_OP_EX2: +      emit_flop(pc, i, 2); +      break; +   case NV_OP_LG2: +      emit_flop(pc, i, 3); +      break; +   case NV_OP_RCP: +      emit_flop(pc, i, 4); +      break; +   case NV_OP_RSQ: +      emit_flop(pc, i, 5); +      break; +   case NV_OP_PRESIN: +   case NV_OP_PREEX2: +      emit_preop(pc, i); +      break; +   case NV_OP_MAD_F32: +      emit_mad_f32(pc, i); +      break; +   case NV_OP_MAX_F32: +   case NV_OP_MAX_S32: +   case NV_OP_MAX_U32: +   case NV_OP_MIN_F32: +   case NV_OP_MIN_S32: +   case NV_OP_MIN_U32: +      emit_minmax(pc, i); +      break; +   case NV_OP_MUL_F32: +      emit_mul_f32(pc, i); +      break; +   case NV_OP_SET_F32: +   case NV_OP_SET_F32_AND: +   case NV_OP_SET_F32_OR: +   case NV_OP_SET_F32_XOR: +   case NV_OP_SET_S32: +   case NV_OP_SET_U32: +   case NV_OP_FSET_F32: +      emit_set(pc, i); +      break; +   case NV_OP_SHL: +   case NV_OP_SHR: +   case NV_OP_SAR: +      emit_shift(pc, i); +      break; +   case NV_OP_TEX: +   case NV_OP_TXB: +   case NV_OP_TXL: +      emit_tex(pc, i); +      break; +   case NV_OP_BRA: +      emit_flow(pc, i, 0x40); +      break; +   case NV_OP_CALL: +      emit_flow(pc, i, 0x50); +      break; +   case NV_OP_JOINAT: +      emit_flow(pc, i, 0x60); +      break; +   case NV_OP_EXIT: +      emit_flow(pc, i, 0x80); +      break; +   case NV_OP_RET: +      emit_flow(pc, i, 0x90); +      break; +   case NV_OP_KIL: +      emit_flow(pc, i, 0x98); +      break; +   case NV_OP_JOIN: +   case NV_OP_NOP: +      pc->emit[0] = 0x00003de4; +      pc->emit[1] = 0x40000000; +      break; +   case NV_OP_SELP: +      emit_selp(pc, i); +      break; +   case NV_OP_SLCT_F32: +   case NV_OP_SLCT_S32: +   case NV_OP_SLCT_U32: +      emit_slct(pc, i); +      break; +   default: +      NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode); +      abort(); +      break; +   } + +   if (i->join) +      pc->emit[0] |= 0x10; +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c new file mode 100644 index 0000000000..e9e387632b --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -0,0 +1,1174 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +#define DESCEND_ARBITRARY(j, f)                                 \ +do {                                                            \ +   b->pass_seq = ctx->pc->pass_seq;                             \ +                                                                \ +   for (j = 0; j < 2; ++j)                                      \ +      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \ +         f(ctx, b->out[j]);	                                  \ +} while (0) + +static INLINE boolean +registers_interfere(struct nv_value *a, struct nv_value *b) +{ +   if (a->reg.file != b->reg.file) +      return FALSE; +   if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file)) +      return FALSE; + +   assert(a->join->reg.id >= 0 && b->join->reg.id >= 0); + +   if (a->join->reg.id < b->join->reg.id) { +      return (a->join->reg.id + a->reg.size >= b->join->reg.id); +   } else +   if (a->join->reg.id > b->join->reg.id) { +      return (b->join->reg.id + b->reg.size >= a->join->reg.id); +   } + +   return FALSE; +} + +static INLINE boolean +values_equal(struct nv_value *a, struct nv_value *b) +{ +   if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) +      return FALSE; +   if (NV_IS_MEMORY_FILE(a->reg.file)) +      return a->reg.address == b->reg.address; +   else +      return a->join->reg.id == b->join->reg.id; +} + +#if 0 +static INLINE boolean +inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b) +{ +   int si, di; + +   for (di = 0; di < 4 && a->def[di]; ++di) +      for (si = 0; si < 5 && b->src[si]; ++si) +         if (registers_interfere(a->def[di], b->src[si]->value)) +            return FALSE; + +   return TRUE; +} + +/* Check whether we can swap the order of the instructions, + * where a & b may be either the earlier or the later one. + */ +static boolean +inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b) +{ +   return inst_commutation_check(a, b) && inst_commutation_check(b, a); +} +#endif + +static INLINE boolean +inst_removable(struct nv_instruction *nvi) +{ +   if (nvi->opcode == NV_OP_ST) +      return FALSE; +   return (!(nvi->terminator || +             nvi->join || +             nvi->target || +             nvi->fixed || +             nvc0_insn_refcount(nvi))); +} + +static INLINE boolean +inst_is_noop(struct nv_instruction *nvi) +{ +   if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND) +      return TRUE; +   if (nvi->terminator || nvi->join) +      return FALSE; +   if (nvi->def[0] && nvi->def[0]->join->reg.id < 0) +      return TRUE; +   if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT) +      return FALSE; +   if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file) +      return FALSE; + +   if (nvi->src[0]->value->join->reg.id < 0) { +      NOUVEAU_DBG("inst_is_noop: orphaned value detected\n"); +      return TRUE; +   } + +   if (nvi->opcode == NV_OP_SELECT) +      if (!values_equal(nvi->def[0], nvi->src[1]->value)) +         return FALSE; +   return values_equal(nvi->def[0], nvi->src[0]->value); +} + +struct nv_pass { +   struct nv_pc *pc; +   int n; +   void *priv; +}; + +static int +nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b); + +static void +nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) +{ +   struct nv_pc *pc = (struct nv_pc *)priv; +   struct nv_basic_block *in; +   struct nv_instruction *nvi, *next; +   int j; + +   for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j); + +   if (j >= 0) { +      in = pc->bb_list[j]; + +      /* check for no-op branches (BRA $PC+8) */ +      if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) { +         in->emit_size -= 8; +         pc->emit_size -= 8; + +         for (++j; j < pc->num_blocks; ++j) +            pc->bb_list[j]->emit_pos -= 8; + +         nvc0_insn_delete(in->exit); +      } +      b->emit_pos = in->emit_pos + in->emit_size; +   } + +   pc->bb_list[pc->num_blocks++] = b; + +   /* visit node */ + +   for (nvi = b->entry; nvi; nvi = next) { +      next = nvi->next; +      if (inst_is_noop(nvi) || +          (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) { +         nvc0_insn_delete(nvi); +      } else +         b->emit_size += 8; +   } +   pc->emit_size += b->emit_size; + +#ifdef NOUVEAU_DEBUG +   if (!b->entry) +      debug_printf("BB:%i is now empty\n", b->id); +   else +      debug_printf("BB:%i size = %u\n", b->id, b->emit_size); +#endif +} + +static int +nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root) +{ +   struct nv_pass pass; + +   pass.pc = pc; + +   pc->pass_seq++; +   nv_pass_flatten(&pass, root); + +   nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc); + +   return 0; +} + +int +nvc0_pc_exec_pass2(struct nv_pc *pc) +{ +   int i, ret; + +   NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks); + +   pc->num_blocks = 0; /* will reorder bb_list */ + +   for (i = 0; i < pc->num_subroutines + 1; ++i) +      if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i]))) +         return ret; +   return 0; +} + +static INLINE boolean +is_cspace_load(struct nv_instruction *nvi) +{ +   if (!nvi) +      return FALSE; +   assert(nvi->indirect != 0); +   return (nvi->opcode == NV_OP_LD && +           nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && +           nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15)); +} + +static INLINE boolean +is_immd32_load(struct nv_instruction *nvi) +{ +   if (!nvi) +      return FALSE; +   return (nvi->opcode == NV_OP_MOV && +           nvi->src[0]->value->reg.file == NV_FILE_IMM && +           nvi->src[0]->value->reg.size == 4); +} + +static INLINE void +check_swap_src_0_1(struct nv_instruction *nvi) +{ +   static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + +   struct nv_ref *src0 = nvi->src[0]; +   struct nv_ref *src1 = nvi->src[1]; + +   if (!nv_op_commutative(nvi->opcode)) +      return; +   assert(src0 && src1 && src0->value && src1->value); + +   if (is_cspace_load(src0->value->insn)) { +      if (!is_cspace_load(src1->value->insn)) { +         nvi->src[0] = src1; +         nvi->src[1] = src0; +      } +   } + +   if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET) +      nvi->set_cond = cc_swapped[nvi->set_cond]; +} + +static void +nvi_set_indirect_load(struct nv_pc *pc, +                      struct nv_instruction *nvi, struct nv_value *val) +{ +   for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect]; +        ++nvi->indirect); +   assert(nvi->indirect < 6); +   nv_reference(pc, nvi, nvi->indirect, val); +} + +static int +nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) +{ +   struct nv_instruction *nvi, *ld; +   int s; + +   for (nvi = b->entry; nvi; nvi = nvi->next) { +      check_swap_src_0_1(nvi); + +      for (s = 0; s < 3 && nvi->src[s]; ++s) { +         ld = nvi->src[s]->value->insn; +         if (!ld || ld->opcode != NV_OP_LD) +            continue; +         if (!nvc0_insn_can_load(nvi, s, ld)) +            continue; + +         /* fold it ! */ +         nv_reference(ctx->pc, nvi, s, ld->src[0]->value); +         if (ld->indirect >= 0) +            nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value); + +         if (!nvc0_insn_refcount(ld)) +            nvc0_insn_delete(ld); +      } +   } +   DESCEND_ARBITRARY(s, nvc0_pass_fold_loads); + +   return 0; +} + +static INLINE uint +modifiers_opcode(uint8_t mod) +{ +   switch (mod) { +   case NV_MOD_NEG: return NV_OP_NEG; +   case NV_MOD_ABS: return NV_OP_ABS; +   case 0: +      return NV_OP_MOV; +   default: +      return NV_OP_NOP; +   } +} + +/* NOTE: Assumes loads have not yet been folded. */ +static int +nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) +{ +   struct nv_instruction *nvi, *mi, *next; +   int j; +   uint8_t mod; + +   for (nvi = b->entry; nvi; nvi = next) { +      next = nvi->next; +      if (nvi->opcode == NV_OP_SUB) { +         nvi->src[1]->mod ^= NV_MOD_NEG; +         nvi->opcode = NV_OP_ADD; +      } + +      for (j = 0; j < 3 && nvi->src[j]; ++j) { +         mi = nvi->src[j]->value->insn; +         if (!mi) +            continue; +         if (mi->def[0]->refc > 1 || mi->predicate >= 0) +            continue; + +         if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG; +         else +         if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS; +         else +            continue; +         assert(!(mod & mi->src[0]->mod & NV_MOD_NEG)); + +         mod |= mi->src[0]->mod; + +         if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) { +            /* abs neg [abs] = abs */ +            mod &= ~(NV_MOD_NEG | NV_MOD_ABS); +         } else +         if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) { +            /* neg as opcode and modifier on same insn cannot occur */ +            /* neg neg abs = abs, neg neg = identity */ +            assert(j == 0); +            if (mod & NV_MOD_ABS) +               nvi->opcode = NV_OP_ABS; +            else +               nvi->opcode = NV_OP_MOV; +            mod = 0; +         } + +         if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod) +            continue; + +         nv_reference(ctx->pc, nvi, j, mi->src[0]->value); + +         nvi->src[j]->mod ^= mod; +      } + +      if (nvi->opcode == NV_OP_SAT) { +         mi = nvi->src[0]->value->insn; + +         if (mi->def[0]->refc > 1 || +             (mi->opcode != NV_OP_ADD && +              mi->opcode != NV_OP_MUL && +              mi->opcode != NV_OP_MAD)) +            continue; +         mi->saturate = 1; +         mi->def[0] = nvi->def[0]; +         mi->def[0]->insn = mi; +         nvc0_insn_delete(nvi); +      } +   } +   DESCEND_ARBITRARY(j, nv_pass_lower_mods); + +   return 0; +} + +#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL) + +/* +static void +modifiers_apply(uint32_t *val, ubyte type, ubyte mod) +{ +   if (mod & NV_MOD_ABS) { +      if (type == NV_TYPE_F32) +         *val &= 0x7fffffff; +      else +      if ((*val) & (1 << 31)) +         *val = ~(*val) + 1; +   } +   if (mod & NV_MOD_NEG) { +      if (type == NV_TYPE_F32) +         *val ^= 0x80000000; +      else +         *val = ~(*val) + 1; +   } +} +*/ + +#if 0 +static void +constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, +                    struct nv_value *src0, struct nv_value *src1) +{ +   struct nv_value *val; +   union { +      float f32; +      uint32_t u32; +      int32_t s32; +   } u0, u1, u; +   ubyte type; + +   if (!nvi->def[0]) +      return; +   type = NV_OPTYPE(nvi->opcode); + +   u.u32 = 0; +   u0.u32 = src0->reg.imm.u32; +   u1.u32 = src1->reg.imm.u32; + +   modifiers_apply(&u0.u32, type, nvi->src[0]->mod); +   modifiers_apply(&u1.u32, type, nvi->src[1]->mod); + +   switch (nvi->opcode) { +   case NV_OP_MAD: +      if (nvi->src[2]->value->reg.file != NV_FILE_GPR) +         return; +      /* fall through */ +   case NV_OP_MUL: +      switch (type) { +      case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break; +      case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break; +      case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break; +      default: +         assert(0); +         break; +      } +      break; +   case NV_OP_ADD: +      switch (type) { +      case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break; +      case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break; +      case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break; +      default: +         assert(0); +         break; +      } +      break; +   case NV_OP_SUB: +      switch (type) { +      case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break; +      case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break; +      case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break; +      default: +         assert(0); +         break; +      } +      break; +   default: +      return; +   } + +   nvi->opcode = NV_OP_MOV; + +   val = new_value(pc, NV_FILE_IMM, type); + +   val->reg.imm.u32 = u.u32; + +   nv_reference(pc, nvi, 1, NULL); +   nv_reference(pc, nvi, 0, val); + +   if (nvi->src[2]) { /* from MAD */ +      nvi->src[1] = nvi->src[0]; +      nvi->src[0] = nvi->src[2]; +      nvi->src[2] = NULL; +      nvi->opcode = NV_OP_ADD; + +      if (val->reg.imm.u32 == 0) { +         nvi->src[1] = NULL; +         nvi->opcode = NV_OP_MOV; +      } +   } +} + +static void +constant_operand(struct nv_pc *pc, +                 struct nv_instruction *nvi, struct nv_value *val, int s) +{ +   union { +      float f32; +      uint32_t u32; +      int32_t s32; +   } u; +   int t = s ? 0 : 1; +   uint op; +   ubyte type; + +   if (!nvi->def[0]) +      return; +   type = NV_OPTYPE(nvi->opcode); + +   u.u32 = val->reg.imm.u32; +   modifiers_apply(&u.u32, type, nvi->src[s]->mod); + +   switch (NV_BASEOP(nvi->opcode)) { +   case NV_OP_MUL: +      if ((type == NV_TYPE_F32 && u.f32 == 1.0f) || +          (NV_TYPE_ISINT(type) && u.u32 == 1)) { +         if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP) +            break; +         nvi->opcode = op; +         nv_reference(pc, nvi, s, NULL); +         nvi->src[0] = nvi->src[t]; +         nvi->src[1] = NULL; +      } else +      if ((type == NV_TYPE_F32 && u.f32 == 2.0f) || +          (NV_TYPE_ISINT(type) && u.u32 == 2)) { +         nvi->opcode = NV_OP_ADD; +         nv_reference(pc, nvi, s, nvi->src[t]->value); +         nvi->src[s]->mod = nvi->src[t]->mod; +      } else +      if (type == NV_TYPE_F32 && u.f32 == -1.0f) { +         if (nvi->src[t]->mod & NV_MOD_NEG) +            nvi->opcode = NV_OP_MOV; +         else +            nvi->opcode = NV_OP_NEG; +         nv_reference(pc, nvi, s, NULL); +         nvi->src[0] = nvi->src[t]; +         nvi->src[1] = NULL; +      } else +      if (type == NV_TYPE_F32 && u.f32 == -2.0f) { +         nvi->opcode = NV_OP_ADD; +         nv_reference(pc, nvi, s, nvi->src[t]->value); +         nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG); +      } else +      if (u.u32 == 0) { +         nvi->opcode = NV_OP_MOV; +         nv_reference(pc, nvi, t, NULL); +         if (s) { +            nvi->src[0] = nvi->src[1]; +            nvi->src[1] = NULL; +         } +      } +      break; +   case NV_OP_ADD: +      if (u.u32 == 0) { +         if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP) +            break; +         nvi->opcode = op; +         nv_reference(pc, nvi, s, NULL); +         nvi->src[0] = nvi->src[t]; +         nvi->src[1] = NULL; +      } +      break; +   case NV_OP_RCP: +      u.f32 = 1.0f / u.f32; +      (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; +      nvi->opcode = NV_OP_MOV; +      assert(s == 0); +      nv_reference(pc, nvi, 0, val); +      break; +   case NV_OP_RSQ: +      u.f32 = 1.0f / sqrtf(u.f32); +      (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; +      nvi->opcode = NV_OP_MOV; +      assert(s == 0); +      nv_reference(pc, nvi, 0, val); +      break; +   default: +      break; +   } +} +#endif + +static int +nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) +{ +#if 0 +   struct nv_instruction *nvi, *next; +   int j; + +   for (nvi = b->entry; nvi; nvi = next) { +      struct nv_value *src0, *src1, *src; +      int mod; + +      next = nvi->next; + +      src0 = nvcg_find_immediate(nvi->src[0]); +      src1 = nvcg_find_immediate(nvi->src[1]); + +      if (src0 && src1) +         constant_expression(ctx->pc, nvi, src0, src1); +      else { +         if (src0) +            constant_operand(ctx->pc, nvi, src0, 0); +         else +         if (src1) +            constant_operand(ctx->pc, nvi, src1, 1); +      } + +      /* try to combine MUL, ADD into MAD */ +      if (nvi->opcode != NV_OP_ADD) +         continue; + +      src0 = nvi->src[0]->value; +      src1 = nvi->src[1]->value; + +      if (SRC_IS_MUL(src0) && src0->refc == 1) +         src = src0; +      else +      if (SRC_IS_MUL(src1) && src1->refc == 1) +         src = src1; +      else +         continue; + +      /* could have an immediate from above constant_*  */ +      if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) +         continue; + +      nvi->opcode = NV_OP_MAD; +      mod = nvi->src[(src == src0) ? 0 : 1]->mod; +      nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL); +      nvi->src[2] = nvi->src[(src == src0) ? 1 : 0]; + +      assert(!(mod & ~NV_MOD_NEG)); +      nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); +      nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); +      nvi->src[0]->mod = src->insn->src[0]->mod ^ mod; +      nvi->src[1]->mod = src->insn->src[1]->mod; +   } +   DESCEND_ARBITRARY(j, nv_pass_lower_arith); +#endif +   return 0; +} + +/* TODO: redundant store elimination */ + +struct mem_record { +   struct mem_record *next; +   struct nv_instruction *insn; +   uint32_t ofst; +   uint32_t base; +   uint32_t size; +}; + +#define MEM_RECORD_POOL_SIZE 1024 + +struct pass_reld_elim { +   struct nv_pc *pc; + +   struct mem_record *imm; +   struct mem_record *mem_v; +   struct mem_record *mem_a; +   struct mem_record *mem_c[16]; +   struct mem_record *mem_l; + +   struct mem_record pool[MEM_RECORD_POOL_SIZE]; +   int alloc; +}; + +static void +combine_load(struct mem_record *rec, struct nv_instruction *ld) +{ +   struct nv_instruction *fv = rec->insn; +   struct nv_value *mem = ld->src[0]->value; +   uint32_t size = rec->size + mem->reg.size; +   int j; +   int d = rec->size / 4; + +   assert(rec->size < 16); +   if (rec->ofst > mem->reg.address) { +      if ((size == 8 && mem->reg.address & 3) || +          (size > 8 && mem->reg.address & 7)) +         return; +      rec->ofst = mem->reg.address; +      for (j = 0; j < d; ++j) +         fv->def[d + j] = fv->def[j]; +      d = 0; +   } else +   if ((size == 8 && rec->ofst & 3) || +       (size > 8 && rec->ofst & 7)) { +      return; +   } + +   for (j = 0; j < mem->reg.size / 4; ++j) { +      fv->def[d] = ld->def[j]; +      fv->def[d++]->insn = fv; +   } + +   fv->src[0]->value->reg.size = rec->size = size; + +   nvc0_insn_delete(ld); +} + +static void +combine_export(struct mem_record *rec, struct nv_instruction *ex) +{ + +} + +static INLINE void +add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec, +               uint32_t base, uint32_t ofst, struct nv_instruction *nvi) +{ +   struct mem_record *it = &ctx->pool[ctx->alloc++]; + +   it->next = *rec; +   *rec = it; +   it->base = base; +   it->ofst = ofst; +   it->insn = nvi; +   it->size = nvi->src[0]->value->reg.size; +} + +/* vectorize and reuse loads from memory or of immediates */ +static int +nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) +{ +   struct mem_record **rec, *it; +   struct nv_instruction *ld, *next; +   struct nv_value *mem; +   uint32_t base, ofst; +   int s; + +   for (ld = b->entry; ld; ld = next) { +      next = ld->next; + +      if (is_cspace_load(ld)) { +         mem = ld->src[0]->value; +         rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)]; +      } else +      if (ld->opcode == NV_OP_VFETCH) { +         mem = ld->src[0]->value; +         rec = &ctx->mem_a; +      } else +      if (ld->opcode == NV_OP_EXPORT) { +         mem = ld->src[0]->value; +         if (mem->reg.file != NV_FILE_MEM_V) +            continue; +         rec = &ctx->mem_v; +      } else { +         continue; +      } +      if (ld->def[0] && ld->def[0]->refc == 0) +         continue; +      ofst = mem->reg.address; +      base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0; + +      for (it = *rec; it; it = it->next) { +         if (it->base == base && +             ((it->ofst >> 4) == (ofst >> 4)) && +             ((it->ofst + it->size == ofst) || +              (it->ofst - mem->reg.size == ofst))) { +            if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12) +               continue; +            if (it->ofst < ofst) { +               if ((it->ofst & 0xf) == 4) +                  continue; +            } else +            if ((ofst & 0xf) == 4) +               continue; +            break; +         } +      } +      if (it) { +         switch (ld->opcode) { +         case NV_OP_EXPORT: combine_export(it, ld); break; +         default: +            combine_load(it, ld); +            break; +         } +      } else +      if (ctx->alloc < MEM_RECORD_POOL_SIZE) { +         add_mem_record(ctx, rec, base, ofst, ld); +      } +   } + +   DESCEND_ARBITRARY(s, nv_pass_mem_opt); +   return 0; +} + +static void +eliminate_store(struct mem_record *rec, struct nv_instruction *st) +{ +} + +/* elimination of redundant stores */ +static int +pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) +{ +   struct mem_record **rec, *it; +   struct nv_instruction *st, *next; +   struct nv_value *mem; +   uint32_t base, ofst, size; +   int s; + +   for (st = b->entry; st; st = next) { +      next = st->next; + +      if (st->opcode == NV_OP_ST) { +         mem = st->src[0]->value; +         rec = &ctx->mem_l; +      } else +      if (st->opcode == NV_OP_EXPORT) { +         mem = st->src[0]->value; +         if (mem->reg.file != NV_FILE_MEM_V) +            continue; +         rec = &ctx->mem_v; +      } else +      if (st->opcode == NV_OP_ST) { +         /* TODO: purge */ +      } +      ofst = mem->reg.address; +      base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0; +      size = mem->reg.size; + +      for (it = *rec; it; it = it->next) { +         if (it->base == base && +             (it->ofst <= ofst && (it->ofst + size) > ofst)) +            break; +      } +      if (it) +         eliminate_store(it, st); +      else +         add_mem_record(ctx, rec, base, ofst, st); +   } + +   DESCEND_ARBITRARY(s, nv_pass_mem_opt); +   return 0; +} + +/* TODO: properly handle loads from l[] memory in the presence of stores */ +static int +nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) +{ +#if 0 +   struct load_record **rec, *it; +   struct nv_instruction *ld, *next; +   uint64_t data[2]; +   struct nv_value *val; +   int j; + +   for (ld = b->entry; ld; ld = next) { +      next = ld->next; +      if (!ld->src[0]) +         continue; +      val = ld->src[0]->value; +      rec = NULL; + +      if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) { +         data[0] = val->reg.id; +         data[1] = 0; +         rec = &ctx->mem_v; +      } else +      if (ld->opcode == NV_OP_LDA) { +         data[0] = val->reg.id; +         data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL; +         if (val->reg.file >= NV_FILE_MEM_C(0) && +             val->reg.file <= NV_FILE_MEM_C(15)) +            rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)]; +         else +         if (val->reg.file == NV_FILE_MEM_S) +            rec = &ctx->mem_s; +         else +         if (val->reg.file == NV_FILE_MEM_L) +            rec = &ctx->mem_l; +      } else +      if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) { +         data[0] = val->reg.imm.u32; +         data[1] = 0; +         rec = &ctx->imm; +      } + +      if (!rec || !ld->def[0]->refc) +         continue; + +      for (it = *rec; it; it = it->next) +         if (it->data[0] == data[0] && it->data[1] == data[1]) +            break; + +      if (it) { +         if (ld->def[0]->reg.id >= 0) +            it->value = ld->def[0]; +         else +         if (!ld->fixed) +            nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value); +      } else { +         if (ctx->alloc == LOAD_RECORD_POOL_SIZE) +            continue; +         it = &ctx->pool[ctx->alloc++]; +         it->next = *rec; +         it->data[0] = data[0]; +         it->data[1] = data[1]; +         it->value = ld->def[0]; +         *rec = it; +      } +   } + +   ctx->imm = NULL; +   ctx->mem_s = NULL; +   ctx->mem_v = NULL; +   for (j = 0; j < 16; ++j) +      ctx->mem_c[j] = NULL; +   ctx->mem_l = NULL; +   ctx->alloc = 0; + +   DESCEND_ARBITRARY(j, nv_pass_reload_elim); +#endif +   return 0; +} + +static int +nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b) +{ +   int i, c, j; + +   for (i = 0; i < ctx->pc->num_instructions; ++i) { +      struct nv_instruction *nvi = &ctx->pc->instructions[i]; +      struct nv_value *def[4]; + +      if (!nv_is_texture_op(nvi->opcode)) +         continue; +      nvi->tex_mask = 0; + +      for (c = 0; c < 4; ++c) { +         if (nvi->def[c]->refc) +            nvi->tex_mask |= 1 << c; +         def[c] = nvi->def[c]; +      } + +      j = 0; +      for (c = 0; c < 4; ++c) +         if (nvi->tex_mask & (1 << c)) +            nvi->def[j++] = def[c]; +      for (c = 0; c < 4; ++c) +         if (!(nvi->tex_mask & (1 << c))) +           nvi->def[j++] = def[c]; +      assert(j == 4); +   } +   return 0; +} + +struct nv_pass_dce { +   struct nv_pc *pc; +   uint removed; +}; + +static int +nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) +{ +   int j; +   struct nv_instruction *nvi, *next; + +   for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) { +      next = nvi->next; + +      if (inst_removable(nvi)) { +         nvc0_insn_delete(nvi); +         ++ctx->removed; +      } +   } +   DESCEND_ARBITRARY(j, nv_pass_dce); + +   return 0; +} + +#if 0 +/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. + * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with + * BREAK and dummy ELSE block. + */ +static INLINE boolean +bb_is_if_else_endif(struct nv_basic_block *bb) +{ +   if (!bb->out[0] || !bb->out[1]) +      return FALSE; + +   if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) { +      return (bb->out[0]->out[1] == bb->out[1]->out[0] && +              !bb->out[1]->out[1]); +   } else { +      return (bb->out[0]->out[0] == bb->out[1]->out[0] && +              !bb->out[0]->out[1] && +              !bb->out[1]->out[1]); +   } +} + +/* predicate instructions and remove branch at the end */ +static void +predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, +                       struct nv_value *p, ubyte cc) +{ + +} +#endif + +/* NOTE: Run this after register allocation, we can just cut out the cflow + * instructions and hook the predicates to the conditional OPs if they are + * not using immediates; better than inserting SELECT to join definitions. + * + * NOTE: Should adapt prior optimization to make this possible more often. + */ +static int +nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) +{ +   return 0; +} + +/* local common subexpression elimination, stupid O(n^2) implementation */ +static int +nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) +{ +#if 0 +   struct nv_instruction *ir, *ik, *next; +   struct nv_instruction *entry = b->phi ? b->phi : b->entry; +   int s; +   unsigned int reps; + +   do { +      reps = 0; +      for (ir = entry; ir; ir = next) { +         next = ir->next; +         for (ik = entry; ik != ir; ik = ik->next) { +            if (ir->opcode != ik->opcode || ir->fixed) +               continue; + +            if (!ir->def[0] || !ik->def[0] || +                ik->opcode == NV_OP_LDA || +                ik->opcode == NV_OP_STA || +                ik->opcode == NV_OP_MOV || +                nv_is_vector_op(ik->opcode)) +               continue; /* ignore loads, stores & moves */ + +            if (ik->src[4] || ir->src[4]) +               continue; /* don't mess with address registers */ + +            if (ik->flags_src || ir->flags_src || +                ik->flags_def || ir->flags_def) +               continue; /* and also not with flags, for now */ + +            if (ik->def[0]->reg.file == NV_FILE_OUT || +                ir->def[0]->reg.file == NV_FILE_OUT || +                !values_equal(ik->def[0], ir->def[0])) +               continue; + +            for (s = 0; s < 3; ++s) { +               struct nv_value *a, *b; + +               if (!ik->src[s]) { +                  if (ir->src[s]) +                     break; +                  continue; +               } +               if (ik->src[s]->mod != ir->src[s]->mod) +                  break; +               a = ik->src[s]->value; +               b = ir->src[s]->value; +               if (a == b) +                  continue; +               if (a->reg.file != b->reg.file || +                   a->reg.id < 0 || +                   a->reg.id != b->reg.id) +                  break; +            } +            if (s == 3) { +               nvc0_insn_delete(ir); +               ++reps; +               nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]); +               break; +            } +         } +      } +   } while(reps); + +   DESCEND_ARBITRARY(s, nv_pass_cse); +#endif +   return 0; +} + +static int +nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) +{ +   struct pass_reld_elim *reldelim; +   struct nv_pass pass; +   struct nv_pass_dce dce; +   int ret; + +   pass.n = 0; +   pass.pc = pc; + +   /* Do this first, so we don't have to pay attention +    * to whether sources are supported memory loads. +    */ +   pc->pass_seq++; +   ret = nv_pass_lower_arith(&pass, root); +   if (ret) +      return ret; + +   pc->pass_seq++; +   ret = nv_pass_lower_mods(&pass, root); +   if (ret) +      return ret; + +   pc->pass_seq++; +   ret = nvc0_pass_fold_loads(&pass, root); +   if (ret) +      return ret; + +   if (pc->opt_reload_elim) { +      reldelim = CALLOC_STRUCT(pass_reld_elim); +      reldelim->pc = pc; + +      pc->pass_seq++; +      ret = nv_pass_reload_elim(reldelim, root); +      if (ret) { +         FREE(reldelim); +         return ret; +      } +      memset(reldelim, 0, sizeof(struct pass_reld_elim)); +      reldelim->pc = pc; +   } + +   pc->pass_seq++; +   ret = nv_pass_cse(&pass, root); +   if (ret) +      return ret; + +   dce.pc = pc; +   do { +      dce.removed = 0; +      pc->pass_seq++; +      ret = nv_pass_dce(&dce, root); +      if (ret) +         return ret; +   } while (dce.removed); + +   if (pc->opt_reload_elim) { +      pc->pass_seq++; +      ret = nv_pass_mem_opt(reldelim, root); +      if (!ret) { +         memset(reldelim, 0, sizeof(struct pass_reld_elim)); +         reldelim->pc = pc; + +         pc->pass_seq++; +         ret = nv_pass_mem_opt(reldelim, root); +      } +      FREE(reldelim); +      if (ret) +         return ret; +   } + +   ret = nv_pass_tex_mask(&pass, root); +   if (ret) +      return ret; + +   return ret; +} + +int +nvc0_pc_exec_pass0(struct nv_pc *pc) +{ +   int i, ret; + +   for (i = 0; i < pc->num_subroutines + 1; ++i) +      if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i]))) +         return ret; +   return 0; +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c new file mode 100644 index 0000000000..9eac5ad900 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -0,0 +1,375 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" + +#define PRINT(args...) debug_printf(args) + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#endif + +static const char *norm = "\x1b[00m"; +static const char *gree = "\x1b[32m"; +static const char *blue = "\x1b[34m"; +static const char *cyan = "\x1b[36m"; +static const char *yllw = "\x1b[33m"; +static const char *mgta = "\x1b[35m"; + +static const char *nv_cond_names[] = +{ +   "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "", +   "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "", +   "o", "c", "a", "s" +}; + +static const char *nv_modifier_strings[] = +{ +   "", +   "neg", +   "abs", +   "neg abs", +   "not", +   "not neg" +   "not abs", +   "not neg abs", +   "sat", +   "BAD_MOD" +}; + +const char * +nvc0_opcode_name(uint opcode) +{ +   return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name; +} + +static INLINE const char * +nv_type_name(ubyte type, ubyte size) +{ +   switch (type) { +   case NV_TYPE_U16: return "u16"; +   case NV_TYPE_S16: return "s16"; +   case NV_TYPE_F32: return "f32"; +   case NV_TYPE_U32: return "u32"; +   case NV_TYPE_S32: return "s32"; +   case NV_TYPE_P32: return "p32"; +   case NV_TYPE_F64: return "f64"; +   case NV_TYPE_ANY: +   { +      switch (size) { +      case 1: return "b8"; +      case 2: return "b16"; +      case 4: return "b32"; +      case 8: return "b64"; +      case 12: return "b96"; +      case 16: return "b128"; +      default: +         return "BAD_SIZE"; +      } +   } +   default: +      return "BAD_TYPE"; +   } +} + +static INLINE const char * +nv_cond_name(ubyte cc) +{ +   return nv_cond_names[MIN2(cc, 19)]; +} + +static INLINE const char * +nv_modifier_string(ubyte mod) +{ +   return nv_modifier_strings[MIN2(mod, 9)]; +} + +static INLINE int +nv_value_id(struct nv_value *value) +{ +   if (value->join->reg.id >= 0) +      return value->join->reg.id; +   return value->n; +} + +static INLINE boolean +nv_value_allocated(struct nv_value *value) +{ +   return (value->reg.id >= 0) ? TRUE : FALSE; +} + +static INLINE void +nv_print_address(const char c, int buf, struct nv_value *a, int offset) +{ +   const char ac = (a && nv_value_allocated(a)) ? '$' : '%'; +   char sg; + +   if (offset < 0) { +      sg = '-'; +      offset = -offset; +   } else { +      sg = '+'; +   } + +   if (buf >= 0) +      PRINT(" %s%c%i[", cyan, c, buf); +   else +      PRINT(" %s%c[", cyan, c); +   if (a) +      PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg); +   PRINT("%s0x%x%s]", yllw, offset, cyan); +} + +static INLINE void +nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type) +{ +   char reg_pfx = nv_value_allocated(value->join) ? '$' : '%'; + +   if (value->reg.file != NV_FILE_PRED) +      PRINT(" %s%s", gree, nv_type_name(type, value->reg.size)); + +   switch (value->reg.file) { +   case NV_FILE_GPR: +      PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value)); +      if (value->reg.size == 8) +         PRINT("d"); +      if (value->reg.size == 16) +         PRINT("q"); +      break; +   case NV_FILE_PRED: +      PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value)); +      break; +   case NV_FILE_COND: +      PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value)); +      break; +   case NV_FILE_MEM_L: +      nv_print_address('l', -1, indir, value->reg.address); +      break; +   case NV_FILE_MEM_G: +      nv_print_address('g', -1, indir, value->reg.address); +      break; +   case NV_FILE_MEM_A: +      nv_print_address('a', -1, indir, value->reg.address); +      break; +   case NV_FILE_MEM_V: +      nv_print_address('v', -1, indir, value->reg.address); +      break; +   case NV_FILE_IMM: +      switch (type) { +      case NV_TYPE_U16: +      case NV_TYPE_S16: +         PRINT(" %s0x%04x", yllw, value->reg.imm.u32); +         break; +      case NV_TYPE_F32: +         PRINT(" %s%f", yllw, value->reg.imm.f32); +         break; +      case NV_TYPE_F64: +         PRINT(" %s%f", yllw, value->reg.imm.f64); +         break; +      case NV_TYPE_U32: +      case NV_TYPE_S32: +      case NV_TYPE_P32: +      case NV_TYPE_ANY: +         PRINT(" %s0x%08x", yllw, value->reg.imm.u32); +         break; +      } +      break; +   default: +      if (value->reg.file >= NV_FILE_MEM_C(0) && +          value->reg.file <= NV_FILE_MEM_C(15)) +         nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir, +                          value->reg.address); +      else +         NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value)); +      break; +   } +} + +static INLINE void +nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type) +{ +   nv_print_value(ref->value, indir, type); +} + +void +nvc0_print_instruction(struct nv_instruction *i) +{ +   int s; + +   PRINT("%i: ", i->serial); + +   if (i->predicate >= 0) { +      PRINT("%s%s", gree, i->cc ? "fl" : "tr"); +      nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8); +      PRINT(" "); +   } + +   PRINT("%s", gree); +   if (NV_BASEOP(i->opcode) == NV_OP_SET) +      PRINT("set %s", nv_cond_name(i->set_cond)); +   else +   if (i->saturate) +      PRINT("sat %s", nvc0_opcode_name(i->opcode)); +   else +      PRINT("%s", nvc0_opcode_name(i->opcode)); + +   if (i->opcode == NV_OP_CVT) +      nv_print_value(i->def[0], NULL, i->ext.cvt.d); +   else +   if (i->def[0]) +      nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode)); +   else +   if (i->target) +      PRINT(" %s(BB:%i)", yllw, i->target->id); +   else +      PRINT(" #"); + +   for (s = 1; s < 4 && i->def[s]; ++s) +      nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode)); +   if (s > 1) +      PRINT("%s ,", norm); + +   for (s = 0; s < 6 && i->src[s]; ++s) { +      ubyte type; +      if (s == i->indirect || s == i->predicate) +         continue; +      if (i->opcode == NV_OP_CVT) +         type = i->ext.cvt.s; +      else +         type = NV_OPTYPE(i->opcode); + +      if (i->src[s]->mod) +         PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod)); + +      if (i->indirect >= 0 && +          NV_IS_MEMORY_FILE(i->src[s]->value->reg.file)) +         nv_print_ref(i->src[s], i->src[i->indirect]->value, type); +      else +         nv_print_ref(i->src[s], NULL, type); +   } +   PRINT(" %s\n", norm); +} + +#define NV_MOD_SGN NV_MOD_ABS | NV_MOD_NEG + +struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = +{ +   { NV_OP_UNDEF,  "undef",  NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, +   { NV_OP_BIND,   "bind",   NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, +   { NV_OP_MERGE,  "merge",  NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, +   { NV_OP_PHI,    "phi",    NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, +   { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, +   { NV_OP_NOP,    "nop",    NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, + +   { NV_OP_LD,  "ld",  NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_ST,  "st",  NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, +   { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, +   { NV_OP_OR,  "or",  NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, +   { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, +   { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, +   { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, +   { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 }, +   { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 1, 2 }, +   { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 }, +   { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, +   { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, +   { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, +   { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_CEIL,  "ceil",  NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + +   { NV_OP_VFETCH,  "vfetch",  NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, +   { NV_OP_PFETCH,  "pfetch",  NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_EXPORT,  "export",  NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, +   { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_EMIT,    "emit",    NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_TEX, "tex",      NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, +   { NV_OP_TXB, "texbias",  NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, +   { NV_OP_TXL, "texlod",   NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, +   { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, +   { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, + +   { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_DFDX,   "dfdx",   NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_DFDY,   "dfdy",   NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_KIL,  "kil",  NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_BRA,  "bra",  NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_RET,  "ret",  NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_RET,  "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_NOP,  "ud",   NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_NOP,  "ud",   NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_JOIN,   "join",   NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, +   { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, +   { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, +   { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, +   { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, +   { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, +   { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, +   { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, +   { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_SET_F32_OR,  "or set",  NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + +   { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, + +   { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + +   { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 } +}; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c new file mode 100644 index 0000000000..6f9d5de197 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -0,0 +1,925 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define NOUVEAU_DEBUG 1 + +/* #define NVC0_RA_DEBUG_LIVEI */ +/* #define NVC0_RA_DEBUG_LIVE_SETS */ +/* #define NVC0_RA_DEBUG_JOIN */ + +#include "nvc0_pc.h" +#include "util/u_simple_list.h" + +#define NVC0_NUM_REGISTER_FILES 3 + +/* @unit_shift: log2 of min allocation unit for register */ +struct register_set { +   uint32_t bits[NVC0_NUM_REGISTER_FILES][2]; +   uint32_t last[NVC0_NUM_REGISTER_FILES]; +   int log2_unit[NVC0_NUM_REGISTER_FILES]; +   struct nv_pc *pc; +}; + +struct nv_pc_pass { +   struct nv_pc *pc; +   struct nv_instruction **insns; +   uint num_insns; +   uint pass_seq; +}; + +static void +ranges_coalesce(struct nv_range *range) +{ +   while (range->next && range->end >= range->next->bgn) { +      struct nv_range *rnn = range->next->next; +      assert(range->bgn <= range->next->bgn); +      range->end = MAX2(range->end, range->next->end); +      FREE(range->next); +      range->next = rnn; +   } +} + +static boolean +add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) +{ +   struct nv_range *range, **nextp = &val->livei; + +   for (range = val->livei; range; range = range->next) { +      if (end < range->bgn) +         break; /* insert before */ + +      if (bgn > range->end) { +         nextp = &range->next; +         continue; /* insert after */ +      } + +      /* overlap */ +      if (bgn < range->bgn) { +         range->bgn = bgn; +         if (end > range->end) +            range->end = end; +         ranges_coalesce(range); +         return TRUE; +      } +      if (end > range->end) { +         range->end = end; +         ranges_coalesce(range); +         return TRUE; +      } +      assert(bgn >= range->bgn); +      assert(end <= range->end); +      return TRUE; +   } + +   if (!new_range) +      new_range = CALLOC_STRUCT(nv_range); + +   new_range->bgn = bgn; +   new_range->end = end; +   new_range->next = range; +   *(nextp) = new_range; +   return FALSE; +} + +static void +add_range(struct nv_value *val, struct nv_basic_block *b, int end) +{ +   int bgn; + +   if (!val->insn) /* ignore non-def values */ +      return; +   assert(b->entry->serial <= b->exit->serial); +   assert(b->phi->serial <= end); +   assert(b->exit->serial + 1 >= end); + +   bgn = val->insn->serial; +   if (bgn < b->entry->serial || bgn > b->exit->serial) +      bgn = b->entry->serial; + +   assert(bgn <= end); + +   add_range_ex(val, bgn, end, NULL); +} + +#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI) +static void +livei_print(struct nv_value *a) +{ +   struct nv_range *r = a->livei; + +   debug_printf("livei %i: ", a->n); +   while (r) { +      debug_printf("[%i, %i) ", r->bgn, r->end); +      r = r->next; +   } +   debug_printf("\n"); +} +#endif + +static void +livei_unify(struct nv_value *dst, struct nv_value *src) +{ +   struct nv_range *range, *next; + +   for (range = src->livei; range; range = next) { +      next = range->next; +      if (add_range_ex(dst, range->bgn, range->end, range)) +         FREE(range); +   } +   src->livei = NULL; +} + +static void +livei_release(struct nv_value *val) +{ +   struct nv_range *range, *next; + +   for (range = val->livei; range; range = next) { +      next = range->next; +      FREE(range); +   } +} + +static boolean +livei_have_overlap(struct nv_value *a, struct nv_value *b) +{ +   struct nv_range *r_a, *r_b; + +   for (r_a = a->livei; r_a; r_a = r_a->next) { +      for (r_b = b->livei; r_b; r_b = r_b->next) { +         if (r_b->bgn < r_a->end && +             r_b->end > r_a->bgn) +            return TRUE; +      } +   } +   return FALSE; +} + +static int +livei_end(struct nv_value *a) +{ +   struct nv_range *r = a->livei; + +   assert(r); +   while (r->next) +      r = r->next; +   return r->end; +} + +static boolean +livei_contains(struct nv_value *a, int pos) +{ +   struct nv_range *r; + +   for (r = a->livei; r && r->bgn <= pos; r = r->next) +      if (r->end > pos) +         return TRUE; +   return FALSE; +} + +static boolean +reg_assign(struct register_set *set, struct nv_value **def, int n) +{ +   int i, id, s, k; +   uint32_t m; +   int f = def[0]->reg.file; + +   k = n; +   if (k == 3) +      k = 4; +   s = (k * def[0]->reg.size) >> set->log2_unit[f]; +   m = (1 << s) - 1; + +   id = set->last[f]; + +   for (i = 0; i * 32 < set->last[f]; ++i) { +      if (set->bits[f][i] == 0xffffffff) +         continue; + +      for (id = 0; id < 32; id += s) +         if (!(set->bits[f][i] & (m << id))) +            break; +      if (id < 32) +         break; +   } +   if (i * 32 + id > set->last[f]) +      return FALSE; + +   set->bits[f][i] |= m << id; + +   id += i * 32; + +   set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1); + +   for (i = 0; i < n; ++i) +      if (def[i]->livei) +         def[i]->reg.id = id++; + +   return TRUE; +} + +static INLINE void +reg_occupy(struct register_set *set, struct nv_value *val) +{ +   int id = val->reg.id, f = val->reg.file; +   uint32_t m; + +   if (id < 0) +      return; +   m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; + +   set->bits[f][id / 32] |= m << (id % 32); + +   if (set->pc->max_reg[f] < id) +      set->pc->max_reg[f] = id; +} + +static INLINE void +reg_release(struct register_set *set, struct nv_value *val) +{ +   int id = val->reg.id, f = val->reg.file; +   uint32_t m; + +   if (id < 0) +      return; +   m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; + +   set->bits[f][id / 32] &= ~(m << (id % 32)); +} + +static INLINE boolean +join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) +{ +   int i; +   struct nv_value *val; + +   if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) +      return FALSE; + +   if (a->join->reg.id == b->join->reg.id) +      return TRUE; + +   /* either a or b or both have been assigned */ + +   if (a->join->reg.id >= 0 && b->join->reg.id >= 0) +      return FALSE; +   else +   if (b->join->reg.id >= 0) { +      if (b->join->reg.id == 63) +         return FALSE; +      val = a; +      a = b; +      b = val; +   } else +   if (a->join->reg.id == 63) +      return FALSE; + +   for (i = 0; i < ctx->pc->num_values; ++i) { +      val = &ctx->pc->values[i]; + +      if (val->join->reg.id != a->join->reg.id) +         continue; +      if (val->join != a->join && livei_have_overlap(val->join, b->join)) +         return FALSE; +   } +   return TRUE; +} + +static INLINE void +do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) +{ +   int j; +   struct nv_value *bjoin = b->join; + +   if (b->join->reg.id >= 0) +      a->join->reg.id = b->join->reg.id; + +   livei_unify(a->join, b->join); + +#ifdef NVC0_RA_DEBUG_JOIN +   debug_printf("joining %i to %i\n", b->n, a->n); +#endif +    +   /* make a->join the new representative */ +   for (j = 0; j < ctx->pc->num_values; ++j)  +      if (ctx->pc->values[j].join == bjoin) +         ctx->pc->values[j].join = a->join; + +   assert(b->join == a->join); +} + +static INLINE void +try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) +{ +   if (!join_allowed(ctx, a, b)) { +#ifdef NVC0_RA_DEBUG_JOIN +      debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); +#endif +      return; +   } +   if (livei_have_overlap(a->join, b->join)) { +#ifdef NVC0_RA_DEBUG_JOIN +      debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n); +      livei_print(a); +      livei_print(b); +#endif +      return; +   } + +   do_join_values(ctx, a, b); +} + +static INLINE boolean +need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) +{ +   int i = 0, n = 0; + +   for (; i < 2; ++i) +      if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i])) +         ++n; + +   return (b->num_in > 1) && (n == 2); +} + +static int +phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, +                struct nv_basic_block *tb) +{ +   int i, j; + +   for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { +      if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb)) +         continue; +      /* NOTE: back-edges are ignored by the reachable-by check */ +      if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb, +                                             phi->src[i]->value->insn->bb, tb)) +         j = i; +   } +   return j; +} + +/* For each operand of each PHI in b, generate a new value by inserting a MOV + * at the end of the block it is coming from and replace the operand with its + * result. This eliminates liveness conflicts and enables us to let values be + * copied to the right register if such a conflict exists nonetheless. + * + * These MOVs are also crucial in making sure the live intervals of phi srces + * are extended until the end of the loop, since they are not included in the + * live-in sets. + */ +static int +pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ +   struct nv_instruction *i, *ni; +   struct nv_value *val; +   struct nv_basic_block *p, *pn; +   int n, j; + +   b->pass_seq = ctx->pc->pass_seq; + +   for (n = 0; n < b->num_in; ++n) { +      p = pn = b->in[n]; +      assert(p); + +      if (need_new_else_block(b, p)) { +         pn = new_basic_block(ctx->pc); + +         if (p->out[0] == b) +            p->out[0] = pn; +         else +            p->out[1] = pn; + +         if (p->exit->target == b) /* target to new else-block */ +            p->exit->target = pn; + +         b->in[n] = pn; + +         pn->out[0] = b; +         pn->in[0] = p; +         pn->num_in = 1; +      } +      ctx->pc->current_block = pn; + +      for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { +         if ((j = phi_opnd_for_bb(i, p, b)) < 0) +            continue; +         val = i->src[j]->value; + +         if (i->src[j]->flags) { +            /* value already encountered from a different in-block */ +            val = val->insn->src[0]->value; +            while (j < 6 && i->src[j]) +               ++j; +            assert(j < 6); +         } + +         ni = new_instruction(ctx->pc, NV_OP_MOV); + +         /* TODO: insert instruction at correct position in the first place */ +         if (ni->prev && ni->prev->target) +            nvc0_insns_permute(ni->prev, ni); + +         ni->def[0] = new_value_like(ctx->pc, val); +         ni->def[0]->insn = ni; +         nv_reference(ctx->pc, ni, 0, val); +         nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */ +         i->src[j]->flags = 1; +      } + +      if (pn != p && pn->exit) { +         ctx->pc->current_block = b->in[n ? 0 : 1]; +         ni = new_instruction(ctx->pc, NV_OP_BRA); +         ni->target = b; +         ni->terminator = 1; +      } +   } + +   for (j = 0; j < 2; ++j) +      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) +         pass_generate_phi_movs(ctx, b->out[j]); + +   return 0; +} + +static int +pass_join_values(struct nv_pc_pass *ctx, int iter) +{ +   int c, n; + +   for (n = 0; n < ctx->num_insns; ++n) { +      struct nv_instruction *i = ctx->insns[n]; + +      switch (i->opcode) { +      case NV_OP_PHI: +         if (iter != 2) +            break; +         for (c = 0; c < 6 && i->src[c]; ++c) +            try_join_values(ctx, i->def[0], i->src[c]->value); +         break; +      case NV_OP_MOV: +         if ((iter == 2) && i->src[0]->value->insn && +             !nv_is_texture_op(i->src[0]->value->join->insn->opcode)) +            try_join_values(ctx, i->def[0], i->src[0]->value); +         break; +      case NV_OP_SELECT: +         if (iter != 1) +            break; +         for (c = 0; c < 6 && i->src[c]; ++c) { +            assert(join_allowed(ctx, i->def[0], i->src[c]->value)); +            do_join_values(ctx, i->def[0], i->src[c]->value); +         } +         break; +      case NV_OP_TEX: +      case NV_OP_TXB: +      case NV_OP_TXL: +      case NV_OP_TXQ: +      case NV_OP_BIND: +         if (iter) +            break; +         for (c = 0; c < 6 && i->src[c]; ++c) +            do_join_values(ctx, i->def[c], i->src[c]->value); +         break; +      default: +         break; +      } +   } +   return 0; +} + +/* Order the instructions so that live intervals can be expressed in numbers. */ +static void +pass_order_instructions(void *priv, struct nv_basic_block *b) +{ +   struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv; +   struct nv_instruction *i; + +   b->pass_seq = ctx->pc->pass_seq; + +   assert(!b->exit || !b->exit->next); +   for (i = b->phi; i; i = i->next) { +      i->serial = ctx->num_insns; +      ctx->insns[ctx->num_insns++] = i; +   } +} + +static void +bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b) +{ +#ifdef NVC0_RA_DEBUG_LIVE_SETS +   struct nv_value *val; +   int j; + +   debug_printf("LIVE-INs of BB:%i: ", b->id); + +   for (j = 0; j < pc->num_values; ++j) { +      if (!(b->live_set[j / 32] & (1 << (j % 32)))) +         continue; +      val = &pc->values[j]; +      if (!val->insn) +         continue; +      debug_printf("%i ", val->n); +   } +   debug_printf("\n"); +#endif +} + +static INLINE void +live_set_add(struct nv_basic_block *b, struct nv_value *val) +{ +   if (!val->insn) /* don't add non-def values */ +      return; +   b->live_set[val->n / 32] |= 1 << (val->n % 32); +} + +static INLINE void +live_set_rem(struct nv_basic_block *b, struct nv_value *val) +{ +   b->live_set[val->n / 32] &= ~(1 << (val->n % 32)); +} + +static INLINE boolean +live_set_test(struct nv_basic_block *b, struct nv_ref *ref) +{ +   int n = ref->value->n; +   return b->live_set[n / 32] & (1 << (n % 32)); +} + +/* The live set of a block contains those values that are live immediately + * before the beginning of the block, so do a backwards scan. + */ +static int +pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ +   struct nv_instruction *i; +   int j, n, ret = 0; + +   if (b->pass_seq >= ctx->pc->pass_seq) +      return 0; +   b->pass_seq = ctx->pc->pass_seq; + +   /* slight hack for undecidedness: set phi = entry if it's undefined */ +   if (!b->phi) +      b->phi = b->entry; + +   for (n = 0; n < 2; ++n) { +      if (!b->out[n] || b->out[n] == b) +         continue; +      ret = pass_build_live_sets(ctx, b->out[n]); +      if (ret) +         return ret; + +      if (n == 0) { +         for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) +            b->live_set[j] = b->out[n]->live_set[j]; +      } else { +         for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) +            b->live_set[j] |= b->out[n]->live_set[j]; +      } +   } + +   if (!b->entry) +      return 0; + +   bb_live_set_print(ctx->pc, b); + +   for (i = b->exit; i != b->entry->prev; i = i->prev) { +      for (j = 0; j < 5 && i->def[j]; j++) +         live_set_rem(b, i->def[j]); +      for (j = 0; j < 6 && i->src[j]; j++) +         live_set_add(b, i->src[j]->value); +   } +   for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) +      live_set_rem(b, i->def[0]); + +   bb_live_set_print(ctx->pc, b); + +   return 0; +} + +static void collect_live_values(struct nv_basic_block *b, const int n) +{ +   int i; + +   if (b->out[0]) { +      if (b->out[1]) { /* what to do about back-edges ? */ +         for (i = 0; i < n; ++i) +            b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; +      } else { +         memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); +      } +   } else +   if (b->out[1]) { +      memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); +   } else { +      memset(b->live_set, 0, n * sizeof(uint32_t)); +   } +} + +/* NOTE: the live intervals of phi functions start at the first non-phi insn. */ +static int +pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ +   struct nv_instruction *i, *i_stop; +   int j, s; +   const int n = (ctx->pc->num_values + 31) / 32; + +   /* verify that first block does not have live-in values */ +   if (b->num_in == 0) +      for (j = 0; j < n; ++j) +         assert(b->live_set[j] == 0); + +   collect_live_values(b, n); + +   /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */ +   for (j = 0; j < 2; ++j) { +      if (!b->out[j] || !b->out[j]->phi) +         continue; +      for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) { +         live_set_rem(b, i->def[0]); + +         for (s = 0; s < 6 && i->src[s]; ++s) { +            assert(i->src[s]->value->insn); +            if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb, +                                         b->out[j])) +               live_set_add(b, i->src[s]->value); +            else +               live_set_rem(b, i->src[s]->value); +         } +      } +   } + +   /* remaining live-outs are live until the end */ +   if (b->exit) { +      for (j = 0; j < ctx->pc->num_values; ++j) { +         if (!(b->live_set[j / 32] & (1 << (j % 32)))) +            continue; +         add_range(&ctx->pc->values[j], b, b->exit->serial + 1); +#ifdef NVC0_RA_DEBUG_LIVEI +         debug_printf("adding range for live value %i: ", j); +         livei_print(&ctx->pc->values[j]); +#endif +      } +   } + +   i_stop = b->entry ? b->entry->prev : NULL; + +   /* don't have to include phi functions here (will have 0 live range) */ +   for (i = b->exit; i != i_stop; i = i->prev) { +      assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial); +      for (j = 0; j < 4 && i->def[j]; ++j) +         live_set_rem(b, i->def[j]); + +      for (j = 0; j < 6 && i->src[j]; ++j) { +         if (!live_set_test(b, i->src[j])) { +            live_set_add(b, i->src[j]->value); +            add_range(i->src[j]->value, b, i->serial); +#ifdef NVC0_RA_DEBUG_LIVEI +            debug_printf("adding range for source %i (ends living): ", +                         i->src[j]->value->n); +            livei_print(i->src[j]->value); +#endif +         } +      } +   } + +   b->pass_seq = ctx->pc->pass_seq; + +   if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq) +      pass_build_intervals(ctx, b->out[0]); + +   if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq) +      pass_build_intervals(ctx, b->out[1]); + +   return 0; +} + +static INLINE void +nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) +{ +   memset(set, 0, sizeof(*set)); + +   set->last[NV_FILE_GPR] = 62; +   set->last[NV_FILE_PRED] = 6; +   set->last[NV_FILE_COND] = 1; + +   set->log2_unit[NV_FILE_GPR] = 2; +   set->log2_unit[NV_FILE_COND] = 0; +   set->log2_unit[NV_FILE_PRED] = 0; + +   set->pc = pc; +} + +static void +insert_ordered_tail(struct nv_value *list, struct nv_value *nval) +{ +   struct nv_value *elem; + +   for (elem = list->prev; +        elem != list && elem->livei->bgn > nval->livei->bgn; +        elem = elem->prev); +   /* now elem begins before or at the same time as val */ + +   nval->prev = elem; +   nval->next = elem->next; +   elem->next->prev = nval; +   elem->next = nval; +} + +static int +pass_linear_scan(struct nv_pc_pass *ctx, int iter) +{ +   struct nv_instruction *i; +   struct register_set f, free; +   int k, n; +   struct nv_value *cur, *val, *tmp[2]; +   struct nv_value active, inactive, handled, unhandled; + +   make_empty_list(&active); +   make_empty_list(&inactive); +   make_empty_list(&handled); +   make_empty_list(&unhandled); + +   nvc0_ctor_register_set(ctx->pc, &free); + +   /* joined values should have range = NULL and thus not be added; +    * also, fixed memory values won't be added because they're not +    * def'd, just used +    */ +   for (n = 0; n < ctx->num_insns; ++n) { +      i = ctx->insns[n]; + +      for (k = 0; k < 5; ++k) { +         if (i->def[k] && i->def[k]->livei) +            insert_ordered_tail(&unhandled, i->def[k]); +         else +         if (0 && i->def[k]) +            debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); +      } +   } + +   for (val = unhandled.next; val != unhandled.prev; val = val->next) { +      assert(val->join == val); +      assert(val->livei->bgn <= val->next->livei->bgn); +   } + +   foreach_s(cur, tmp[0], &unhandled) { +      remove_from_list(cur); + +      foreach_s(val, tmp[1], &active) { +         if (livei_end(val) <= cur->livei->bgn) { +            reg_release(&free, val); +            move_to_head(&handled, val); +         } else +         if (!livei_contains(val, cur->livei->bgn)) { +            reg_release(&free, val); +            move_to_head(&inactive, val); +         } +      } + +      foreach_s(val, tmp[1], &inactive) { +         if (livei_end(val) <= cur->livei->bgn) +            move_to_head(&handled, val); +         else +         if (livei_contains(val, cur->livei->bgn)) { +            reg_occupy(&free, val); +            move_to_head(&active, val); +         } +      } + +      f = free; + +      foreach(val, &inactive) +         if (livei_have_overlap(val, cur)) +            reg_occupy(&f, val); + +      foreach(val, &unhandled) +         if (val->reg.id >= 0 && livei_have_overlap(val, cur)) +            reg_occupy(&f, val); + +      if (cur->reg.id < 0) { +         boolean mem = FALSE; +         int v = nvi_vector_size(cur->insn); + +         if (v > 1) +            mem = !reg_assign(&f, &cur->insn->def[0], v); +         else +         if (iter) +            mem = !reg_assign(&f, &cur, 1); + +         if (mem) { +            NOUVEAU_ERR("out of registers\n"); +            abort(); +         } +      } +      insert_at_head(&active, cur); +      reg_occupy(&free, cur); +   } + +   return 0; +} + +static int +nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) +{ +   struct nv_pc_pass *ctx; +   int i, ret; + +   NOUVEAU_DBG("REGISTER ALLOCATION - entering\n"); + +   ctx = CALLOC_STRUCT(nv_pc_pass); +   if (!ctx) +      return -1; +   ctx->pc = pc; + +   ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *)); +   if (!ctx->insns) { +      FREE(ctx); +      return -1; +   } + +   pc->pass_seq++; +   ret = pass_generate_phi_movs(ctx, root); +   assert(!ret); + +   for (i = 0; i < pc->loop_nesting_bound; ++i) { +      pc->pass_seq++; +      ret = pass_build_live_sets(ctx, root); +      assert(!ret && "live sets"); +      if (ret) { +         NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i); +         goto out; +      } +   } + +   pc->pass_seq++; +   nvc0_pc_pass_in_order(root, pass_order_instructions, ctx); + +   pc->pass_seq++; +   ret = pass_build_intervals(ctx, root); +   assert(!ret && "build intervals"); +   if (ret) { +      NOUVEAU_ERR("failed to build live intervals\n"); +      goto out; +   } + +#ifdef NVC0_RA_DEBUG_LIVEI +   for (i = 0; i < pc->num_values; ++i) +      livei_print(&pc->values[i]); +#endif + +   ret = pass_join_values(ctx, 0); +   if (ret) +      goto out; +   ret = pass_linear_scan(ctx, 0); +   if (ret) +      goto out; +   ret = pass_join_values(ctx, 1); +   if (ret) +      goto out; +   ret = pass_join_values(ctx, 2); +   if (ret) +      goto out; +   ret = pass_linear_scan(ctx, 1); +   if (ret) +      goto out; + +   for (i = 0; i < pc->num_values; ++i) +      livei_release(&pc->values[i]); + +   NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n"); + +out: +   FREE(ctx->insns); +   FREE(ctx); +   return ret; +} + +int +nvc0_pc_exec_pass1(struct nv_pc *pc) +{ +   int i, ret; + +   for (i = 0; i < pc->num_subroutines + 1; ++i) +      if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i]))) +         return ret; +   return 0; +} diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c new file mode 100644 index 0000000000..e159b7161d --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -0,0 +1,657 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_shader_tokens.h" +#include "pipe/p_defines.h" + +#define NOUVEAU_DEBUG + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" + +#include "nvc0_context.h" +#include "nvc0_pc.h" + +static unsigned +nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) +{ +   unsigned mask = inst->Dst[0].Register.WriteMask; + +   switch (inst->Instruction.Opcode) { +   case TGSI_OPCODE_COS: +   case TGSI_OPCODE_SIN: +      return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); +   case TGSI_OPCODE_DP3: +      return 0x7; +   case TGSI_OPCODE_DP4: +   case TGSI_OPCODE_DPH: +   case TGSI_OPCODE_KIL: /* WriteMask ignored */ +      return 0xf; +   case TGSI_OPCODE_DST: +      return mask & (c ? 0xa : 0x6); +   case TGSI_OPCODE_EX2: +   case TGSI_OPCODE_EXP: +   case TGSI_OPCODE_LG2: +   case TGSI_OPCODE_LOG: +   case TGSI_OPCODE_POW: +   case TGSI_OPCODE_RCP: +   case TGSI_OPCODE_RSQ: +   case TGSI_OPCODE_SCS: +      return 0x1; +   case TGSI_OPCODE_IF: +      return 0x1; +   case TGSI_OPCODE_LIT: +      return 0xb; +   case TGSI_OPCODE_TEX: +   case TGSI_OPCODE_TXB: +   case TGSI_OPCODE_TXL: +   case TGSI_OPCODE_TXP: +   { +      const struct tgsi_instruction_texture *tex; + +      assert(inst->Instruction.Texture); +      tex = &inst->Texture; + +      mask = 0x7; +      if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && +          inst->Instruction.Opcode != TGSI_OPCODE_TXD) +         mask |= 0x8; /* bias, lod or proj */ + +      switch (tex->Texture) { +      case TGSI_TEXTURE_1D: +         mask &= 0x9; +         break; +      case TGSI_TEXTURE_SHADOW1D: +         mask &= 0x5; +         break; +      case TGSI_TEXTURE_2D: +         mask &= 0xb; +         break; +      default: +         break; +      } +   } +  	   return mask; +   case TGSI_OPCODE_XPD: +   { +      unsigned x = 0; +      if (mask & 1) x |= 0x6; +      if (mask & 2) x |= 0x5; +      if (mask & 4) x |= 0x3; +      return x; +   } +   default: +      break; +   } + +   return mask; +} + +static void +nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id) +{ +   int i, c; + +   for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) +      for (c = 0; c < 4; ++c) +         ti->input_access[i][c] = id; + +   ti->indirect_inputs = TRUE; +} + +static void +nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id) +{ +   int i, c; + +   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) +      for (c = 0; c < 4; ++c) +         ti->output_access[i][c] = id; + +   ti->indirect_outputs = TRUE; +} + +static INLINE unsigned +nvc0_system_value_location(unsigned sn, unsigned si) +{ +   switch (sn) { +      /* +   case TGSI_SEMANTIC_VERTEXID: +      return 0x2fc; +      */ +   case TGSI_SEMANTIC_PRIMID: +      return 0x60; +      /* +   case TGSI_SEMANTIC_LAYER_INDEX: +      return 0x64; +   case TGSI_SEMANTIC_VIEWPORT_INDEX: +      return 0x68; +      */ +   case TGSI_SEMANTIC_INSTANCEID: +      return 0x2f8; +   case TGSI_SEMANTIC_FACE: +      return 0x3fc; +   default: +      assert(0); +      return 0x000; +   } +} + +static INLINE unsigned +nvc0_varying_location(unsigned sn, unsigned si) +{ +   switch (sn) { +   case TGSI_SEMANTIC_POSITION: +      return 0x70; +   case TGSI_SEMANTIC_COLOR: +      return 0x280 + (si * 16); /* are these hard-wired ? */ +   case TGSI_SEMANTIC_BCOLOR: +      return 0x2a0 + (si * 16); +   case TGSI_SEMANTIC_FOG: +      return 0x270; +   case TGSI_SEMANTIC_PSIZE: +      return 0x6c; +      /* +   case TGSI_SEMANTIC_PNTC: +      return 0x2e0; +      */ +   case TGSI_SEMANTIC_GENERIC: +      assert(si < 31); +      return 0x80 + (si * 16); +   case TGSI_SEMANTIC_NORMAL: +      return 0x360; +   case TGSI_SEMANTIC_PRIMID: +      return 0x40; +   case TGSI_SEMANTIC_FACE: +      return 0x3fc; +      /* +   case TGSI_SEMANTIC_CLIP_DISTANCE: +      return 0x2c0 + (si * 4); +      */ +   default: +      assert(0); +      return 0x000; +   } +} + +static INLINE unsigned +nvc0_interp_mode(const struct tgsi_full_declaration *decl) +{ +   unsigned mode; + +   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) +      mode = NVC0_INTERP_FLAT; +   else +   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) +      mode = NVC0_INTERP_PERSPECTIVE; +   else +      mode = NVC0_INTERP_LINEAR; + +   if (decl->Declaration.Centroid) +      mode |= NVC0_INTERP_CENTROID; + +   return mode; +} + +static void +prog_immediate(struct nvc0_translation_info *ti, +               const struct tgsi_full_immediate *imm) +{ +   int c; +   unsigned n = ti->immd32_nr++; + +   assert(ti->immd32_nr <= ti->scan.immediate_count); + +   for (c = 0; c < 4; ++c) +      ti->immd32[n * 4 + c] = imm->u[c].Uint; + +   ti->immd32_ty[n] = imm->Immediate.DataType; +} + +static boolean +prog_decl(struct nvc0_translation_info *ti, +          const struct tgsi_full_declaration *decl) +{ +   unsigned i, c; +   unsigned sn = TGSI_SEMANTIC_GENERIC; +   unsigned si = 0; +   const unsigned first = decl->Range.First; +   const unsigned last = decl->Range.Last; + +   if (decl->Declaration.Semantic) { +      sn = decl->Semantic.Name; +      si = decl->Semantic.Index; +   } +    +   switch (decl->Declaration.File) { +   case TGSI_FILE_INPUT: +      for (i = first; i <= last; ++i) { +         if (ti->prog->type == PIPE_SHADER_VERTEX) { +            sn = TGSI_SEMANTIC_GENERIC; +            si = i; +         } +         for (c = 0; c < 4; ++c) +            ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + +         if (ti->prog->type == PIPE_SHADER_FRAGMENT) +            ti->interp_mode[i] = nvc0_interp_mode(decl); +      } +      break; +   case TGSI_FILE_OUTPUT: +      for (i = first; i <= last; ++i, ++si) { +         if (ti->prog->type == PIPE_SHADER_FRAGMENT) { +            si = i; +            if (i == ti->fp_depth_output) { +               ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4; +            } else { +               if (i > ti->fp_depth_output) +                  si -= 1; +               for (c = 0; c < 4; ++c) +                  ti->output_loc[i][c] = si * 4 + c; +            } +         } else { +            for (c = 0; c < 4; ++c) +               ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; +         } +      } +      break; +   case TGSI_FILE_SYSTEM_VALUE: +      ti->sysval_loc[i] = nvc0_system_value_location(sn, si); +      assert(first == last); +      break; +   case TGSI_FILE_NULL: +   case TGSI_FILE_CONSTANT: +   case TGSI_FILE_TEMPORARY: +   case TGSI_FILE_SAMPLER: +   case TGSI_FILE_ADDRESS: +   case TGSI_FILE_IMMEDIATE: +   case TGSI_FILE_PREDICATE: +      break; +   default: +      NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File); +      return FALSE; +   } +   return TRUE; +} + +static void +prog_inst(struct nvc0_translation_info *ti, +          const struct tgsi_full_instruction *inst, int id) +{ +   const struct tgsi_dst_register *dst; +   const struct tgsi_src_register *src; +   int s, c, k; +   unsigned mask; + +   if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { +      ti->subr[ti->num_subrs].first_insn = id - 1; +      ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */ +      ++ti->num_subrs; +   } + +   if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { +      dst = &inst->Dst[0].Register; + +      for (c = 0; c < 4; ++c) { +         if (dst->Indirect) +            nvc0_indirect_outputs(ti, id); +         if (!(dst->WriteMask & (1 << c))) +            continue; +         ti->output_access[dst->Index][c] = id; +      } + +      if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && +          inst->Src[0].Register.File == TGSI_FILE_INPUT && +          dst->Index == ti->edgeflag_out) +         ti->prog->vp.edgeflag = inst->Src[0].Register.Index; +   } else +   if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { +      if (inst->Dst[0].Register.Indirect) +         ti->require_stores = TRUE; +   } + +   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { +      src = &inst->Src[s].Register; +      if (src->File == TGSI_FILE_TEMPORARY) +         if (inst->Src[s].Register.Indirect) +            ti->require_stores = TRUE; +      if (src->File != TGSI_FILE_INPUT) +         continue; +      mask = nvc0_tgsi_src_mask(inst, s); + +      if (inst->Src[s].Register.Indirect) +         nvc0_indirect_inputs(ti, id); + +      for (c = 0; c < 4; ++c) { +         if (!(mask & (1 << c))) +            continue; +         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); +         if (k <= TGSI_SWIZZLE_W) +            ti->input_access[src->Index][k] = id; +      } +   } +} + +/* Probably should introduce something like struct tgsi_function_declaration + * instead of trying to guess inputs/outputs. + */ +static void +prog_subroutine_inst(struct nvc0_subroutine *subr, +                     const struct tgsi_full_instruction *inst) +{ +   const struct tgsi_dst_register *dst; +   const struct tgsi_src_register *src; +   int s, c, k; +   unsigned mask; + +   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { +      src = &inst->Src[s].Register; +      if (src->File != TGSI_FILE_TEMPORARY) +         continue; +      mask = nvc0_tgsi_src_mask(inst, s); + +      for (c = 0; c < 4; ++c) { +         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); + +         if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W) +            if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32)))) +               subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32); +      } +   } + +   if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { +      dst = &inst->Dst[0].Register; + +      for (c = 0; c < 4; ++c) +         if (dst->WriteMask & (1 << c)) +            subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32); +   } +} + +static int +nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +{ +   int i, c; +   unsigned a; + +   for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { +      for (c = 0; c < 4; ++c, ++a) +         if (ti->input_access[i][c]) +            vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */ +   } + +   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { +      a = (ti->output_loc[i][0] - 0x40) / 4; +      for (c = 0; c < 4; ++c, ++a) { +         if (!ti->output_access[i][c]) +            continue; +         vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */ +      } +   } + +   return 0; +} + +static int +nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +{ +   vp->hdr[0] = 0x20461; +   vp->hdr[4] = 0xff000; + +   vp->hdr[18] = (1 << vp->vp.num_ucps) - 1; + +   return nvc0_vp_gp_gen_header(vp, ti); +} + +static int +nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti) +{ +   unsigned max_output_verts, output_prim; +   unsigned i; + +   gp->hdr[0] = 0x00021061; +   gp->hdr[2] = 0x01000000; + +   for (i = 0; i < ti->scan.num_properties; ++i) { +      switch (ti->scan.properties[i].name) { +      case TGSI_PROPERTY_GS_OUTPUT_PRIM: +         output_prim = ti->scan.properties[i].data[0]; +         break; +      case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: +         max_output_verts = ti->scan.properties[i].data[0]; +         break; +      default: +         break; +      } +   } + +   switch (output_prim) { +   case PIPE_PRIM_POINTS: +      gp->hdr[3] = 0x01000000; +      gp->hdr[0] |= 0xf0000000; +      break; +   case PIPE_PRIM_LINE_STRIP: +      gp->hdr[3] = 0x06000000; +      gp->hdr[0] |= 0x10000000; +      break; +   case PIPE_PRIM_TRIANGLE_STRIP: +      gp->hdr[3] = 0x07000000; +      gp->hdr[0] |= 0x10000000; +      break; +   default: +      assert(0); +      break; +   } + +   gp->hdr[4] = max_output_verts & 0x1ff; + +   return nvc0_vp_gp_gen_header(gp, ti); +} + +static int +nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) +{ +   int i, c; +   unsigned a, m; +    +   fp->hdr[0] = 0x21462; +   fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ + +   if (ti->scan.uses_kill) +      fp->hdr[0] |= 0x8000; +   if (ti->scan.writes_z) { +      fp->hdr[19] |= 0x2; +      if (ti->scan.num_outputs > 2) +         fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ +   } else { +   if (ti->scan.num_outputs > 1) +      fp->hdr[0] |= 0x8000; /* FP_MULTIPLE_COLOR_OUTPUTS */ +   } + +   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { +      m = ti->interp_mode[i]; +      for (c = 0; c < 4; ++c) { +         if (!ti->input_access[i][c]) +            continue; +         a = ti->input_loc[i][c] / 2; +         if ((a & ~7) == 0x70/2) +            fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ +         else +            fp->hdr[4 + a / 32] |= m << (a % 32); +      } +   } + +   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { +      if (i != ti->fp_depth_output) +         fp->hdr[18] |= 0xf << ti->output_loc[i][0]; +   } + +   return 0; +} + +static boolean +nvc0_prog_scan(struct nvc0_translation_info *ti) +{ +   struct nvc0_program *prog = ti->prog; +   struct tgsi_parse_context parse; +   int ret; +   unsigned i; + +#ifdef NOUVEAU_DEBUG +   tgsi_dump(prog->pipe.tokens, 0); +#endif + +   tgsi_scan_shader(prog->pipe.tokens, &ti->scan); + +   if (ti->prog->type == PIPE_SHADER_FRAGMENT) { +      ti->fp_depth_output = 255; +      for (i = 0; i < ti->scan.num_outputs; ++i) +         if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION) +            ti->fp_depth_output = i; +   } + +   ti->subr = +      CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0])); + +   ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); +   ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); + +   ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0])); + +   tgsi_parse_init(&parse, prog->pipe.tokens); +   while (!tgsi_parse_end_of_tokens(&parse)) { +      tgsi_parse_token(&parse); + +      switch (parse.FullToken.Token.Type) { +      case TGSI_TOKEN_TYPE_IMMEDIATE: +         prog_immediate(ti, &parse.FullToken.FullImmediate); +         break; +      case TGSI_TOKEN_TYPE_DECLARATION: +         prog_decl(ti, &parse.FullToken.FullDeclaration); +         break; +      case TGSI_TOKEN_TYPE_INSTRUCTION: +         ti->insns[ti->num_insns] = parse.FullToken.FullInstruction; +         prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns); +         break; +      default: +         break; +      } +   } + +   for (i = 0; i < ti->num_subrs; ++i) { +      unsigned pc = ti->subr[i].id; +      while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB) +         prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]); +   } + +   switch (prog->type) { +   case PIPE_SHADER_VERTEX: +      ti->input_file = NV_FILE_MEM_A; +      ti->output_file = NV_FILE_MEM_V; +      ret = nvc0_vp_gen_header(prog, ti); +      break; +      /* +   case PIPE_SHADER_TESSELLATION_CONTROL: +      ret = nvc0_tcp_gen_header(ti); +      break; +   case PIPE_SHADER_TESSELLATION_EVALUATION: +      ret = nvc0_tep_gen_header(ti); +      break; +   case PIPE_SHADER_GEOMETRY: +      ret = nvc0_gp_gen_header(ti); +      break; +      */ +   case PIPE_SHADER_FRAGMENT: +      ti->input_file = NV_FILE_MEM_V; +      ti->output_file = NV_FILE_GPR; +      ret = nvc0_fp_gen_header(prog, ti); +      break; +   default: +      assert(!"unsupported program type"); +      ret = -1; +      break; +   } + +   assert(!ret); +   return ret; +} + +boolean +nvc0_program_translate(struct nvc0_program *prog) +{ +   struct nvc0_translation_info *ti; +   int ret; + +   ti = CALLOC_STRUCT(nvc0_translation_info); +   ti->prog = prog; + +   ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; + +   if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps) +      ti->append_ucp = TRUE; + +   ret = nvc0_prog_scan(ti); +   if (ret) { +      NOUVEAU_ERR("unsupported shader program\n"); +      goto out; +   } + +   ret = nvc0_generate_code(ti); +   if (ret) +      NOUVEAU_ERR("shader translation failed\n"); + +   { +      unsigned i; +      for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i) +         debug_printf("HDR[%02lx] = 0x%08x\n", +                      i * sizeof(prog->hdr[0]), prog->hdr[i]); +   } + +out: +   if (ti->immd32) +      FREE(ti->immd32); +   if (ti->immd32_ty) +      FREE(ti->immd32_ty); +   if (ti->insns) +      FREE(ti->insns); +   if (ti->subr) +      FREE(ti->subr); +   FREE(ti); +   return ret ? FALSE : TRUE; +} + +void +nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ +   if (prog->res) +      nouveau_resource_free(&prog->res); + +   if (prog->code) +      FREE(prog->code); +   if (prog->relocs) +      FREE(prog->relocs); + +   memset(prog->hdr, 0, sizeof(prog->hdr)); + +   prog->translated = FALSE; +} diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h new file mode 100644 index 0000000000..1271303144 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -0,0 +1,84 @@ + +#ifndef __NVC0_PROGRAM_H__ +#define __NVC0_PROGRAM_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +#define NVC0_CAP_MAX_PROGRAM_TEMPS 64 + +#define NVC0_SHADER_HEADER_SIZE (20 * 4) + +struct nvc0_program { +   struct pipe_shader_state pipe; + +   ubyte type; +   boolean translated; +   ubyte max_gpr; + +   uint32_t *code; +   unsigned code_base; +   unsigned code_size; +   unsigned parm_size; + +   uint32_t hdr[20]; + +   uint32_t flags[2]; /* FP_ZORDER */ + +   struct { +      uint8_t edgeflag; +      uint8_t num_ucps; +   } vp; + +   void *relocs; +   unsigned num_relocs; + +   struct nouveau_resource *res; +}; + +/* first 2 bits are written into the program header, for each input */ +#define NVC0_INTERP_FLAT          (1 << 0) +#define NVC0_INTERP_PERSPECTIVE   (2 << 0) +#define NVC0_INTERP_LINEAR        (3 << 0) +#define NVC0_INTERP_CENTROID      (1 << 2) + +/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */ +struct nvc0_subroutine { +   unsigned id; +   unsigned first_insn; +   uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; +   uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; +}; + +struct nvc0_translation_info { +   struct nvc0_program *prog; +   struct tgsi_full_instruction *insns; +   unsigned num_insns; +   ubyte input_file; +   ubyte output_file; +   ubyte fp_depth_output; +   uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4]; +   uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4]; +   uint16_t sysval_loc[TGSI_SEMANTIC_COUNT]; +   int input_access[PIPE_MAX_SHADER_INPUTS][4]; +   int output_access[PIPE_MAX_SHADER_OUTPUTS][4]; +   ubyte interp_mode[PIPE_MAX_SHADER_INPUTS]; +   boolean indirect_inputs; +   boolean indirect_outputs; +   boolean require_stores; +   uint32_t *immd32; +   ubyte *immd32_ty; +   unsigned immd32_nr; +   ubyte edgeflag_out; +   struct nvc0_subroutine *subr; +   unsigned num_subrs; +   boolean append_ucp; +   struct tgsi_shader_info scan; +}; + +int nvc0_generate_code(struct nvc0_translation_info *); + +void nvc0_relocate_program(struct nvc0_program *, +                           uint32_t code_base, uint32_t data_base); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c new file mode 100644 index 0000000000..8cb05cdd09 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -0,0 +1,278 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nvc0_3d.xml.h" + +struct push_context { +   struct nouveau_channel *chan; + +   void *idxbuf; + +   float edgeflag; +   int edgeflag_attr; + +   uint32_t vertex_words; +   uint32_t packet_vertex_limit; + +   struct translate *translate; + +   boolean primitive_restart; +   uint32_t prim; +   uint32_t restart_index; +}; + +static INLINE unsigned +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) +{ +   unsigned i; +   for (i = 0; i < push; ++i) +      if (elts[i] == index) +         break; +   return i; +} + +static INLINE unsigned +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) +{ +   unsigned i; +   for (i = 0; i < push; ++i) +      if (elts[i] == index) +         break; +   return i; +} + +static INLINE unsigned +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) +{ +   unsigned i; +   for (i = 0; i < push; ++i) +      if (elts[i] == index) +         break; +   return i; +} + +static void +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ +   uint8_t *elts = (uint8_t *)ctx->idxbuf + start; + +   while (count) { +      unsigned push = MIN2(count, ctx->packet_vertex_limit); +      unsigned size, nr; + +      nr = push; +      if (ctx->primitive_restart) +         nr = prim_restart_search_i08(elts, push, ctx->restart_index); + +      size = ctx->vertex_words * nr; + +      BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + +      ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur); + +      ctx->chan->cur += size; +      count -= push; +      elts += push; + +      if (nr != push) { +         BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); +         OUT_RING  (ctx->chan, 0); +         OUT_RING  (ctx->chan, ctx->prim); +      } +   } +} + +static void +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ +   uint16_t *elts = (uint16_t *)ctx->idxbuf + start; + +   while (count) { +      unsigned push = MIN2(count, ctx->packet_vertex_limit); +      unsigned size, nr; + +      nr = push; +      if (ctx->primitive_restart) +         nr = prim_restart_search_i16(elts, push, ctx->restart_index); + +      size = ctx->vertex_words * nr; + +      BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + +      ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur); + +      ctx->chan->cur += size; +      count -= push; +      elts += push; + +      if (nr != push) { +         BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); +         OUT_RING  (ctx->chan, 0); +         OUT_RING  (ctx->chan, ctx->prim); +      } +   } +} + +static void +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ +   uint32_t *elts = (uint32_t *)ctx->idxbuf + start; + +   while (count) { +      unsigned push = MIN2(count, ctx->packet_vertex_limit); +      unsigned size, nr; + +      nr = push; +      if (ctx->primitive_restart) +         nr = prim_restart_search_i32(elts, push, ctx->restart_index); + +      size = ctx->vertex_words * nr; + +      BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + +      ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur); + +      ctx->chan->cur += size; +      count -= push; +      elts += push; + +      if (nr != push) { +         BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); +         OUT_RING  (ctx->chan, 0); +         OUT_RING  (ctx->chan, ctx->prim); +      } +   } +} + +static void +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ +   while (count) { +      unsigned push = MIN2(count, ctx->packet_vertex_limit); +      unsigned size = ctx->vertex_words * push; + +      BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + +      ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur); +      ctx->chan->cur += size; +      count -= push; +      start += push; +   } +} + + +#define NVC0_PRIM_GL_CASE(n) \ +   case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ +   switch (prim) { +   NVC0_PRIM_GL_CASE(POINTS); +   NVC0_PRIM_GL_CASE(LINES); +   NVC0_PRIM_GL_CASE(LINE_LOOP); +   NVC0_PRIM_GL_CASE(LINE_STRIP); +   NVC0_PRIM_GL_CASE(TRIANGLES); +   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); +   NVC0_PRIM_GL_CASE(TRIANGLE_FAN); +   NVC0_PRIM_GL_CASE(QUADS); +   NVC0_PRIM_GL_CASE(QUAD_STRIP); +   NVC0_PRIM_GL_CASE(POLYGON); +   NVC0_PRIM_GL_CASE(LINES_ADJACENCY); +   NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); +   NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); +   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); +   /* +   NVC0_PRIM_GL_CASE(PATCHES); */ +   default: +      return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; +      break; +   } +} + +void +nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ +   struct push_context ctx; +   struct pipe_transfer *transfer = NULL; +   unsigned i, index_size; +   unsigned inst = info->instance_count; + +   ctx.chan = nvc0->screen->base.channel; +   ctx.translate = nvc0->vertex->translate; +   ctx.packet_vertex_limit = nvc0->vertex->vtx_per_packet_max; +   ctx.vertex_words = nvc0->vertex->vtx_size; + +   for (i = 0; i < nvc0->num_vtxbufs; ++i) { +      uint8_t *data; +      struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; +      struct nvc0_resource *res = nvc0_resource(vb->buffer); + +      data = nvc0_resource_map_offset(res, vb->buffer_offset, NOUVEAU_BO_RD); +      if (info->indexed) +         data += info->index_bias * vb->stride; + +      ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); +   } + +   if (info->indexed) { +      ctx.idxbuf = pipe_buffer_map(&nvc0->pipe, nvc0->idxbuf.buffer, +                                   PIPE_TRANSFER_READ, &transfer); +      if (!ctx.idxbuf) +         return; +      ctx.idxbuf = (uint8_t *)ctx.idxbuf + nvc0->idxbuf.offset; + +      index_size = nvc0->idxbuf.index_size; +      ctx.primitive_restart = info->primitive_restart; +      ctx.restart_index = info->restart_index; +   } else { +      ctx.idxbuf = NULL; +      index_size = 0; +      ctx.primitive_restart = FALSE; +      ctx.restart_index = 0; +   } + +   ctx.prim = nvc0_prim_gl(info->mode); + +   while (inst--) { +      BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1); +      OUT_RING  (ctx.chan, ctx.prim); +      switch (index_size) { +      case 0: +         emit_vertices_seq(&ctx, info->start, info->count); +         break; +      case 1: +         emit_vertices_i08(&ctx, info->start, info->count); +         break; +      case 2: +         emit_vertices_i16(&ctx, info->start, info->count); +         break; +      case 4: +         emit_vertices_i32(&ctx, info->start, info->count); +         break; +      default: +         assert(0); +         break; +      } +      INLIN_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0); + +      ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; +   } + +   if (info->indexed) +	   pipe_buffer_unmap(&nvc0->pipe, nvc0->idxbuf.buffer, transfer); + +   for (i = 0; i < nvc0->num_vtxbufs; ++i) { +      struct nvc0_resource *res = nvc0_resource(nvc0->vtxbuf[i].buffer); + +      if (res->bo) +         nouveau_bo_unmap(res->bo); +   } +} diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c new file mode 100644 index 0000000000..07dd7b56b9 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_push2.c @@ -0,0 +1,329 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nvc0_3d.xml.h" + +struct push_context { +   struct nvc0_context *nvc0; + +   uint vertex_size; + +   void *idxbuf; +   uint idxsize; + +   float edgeflag; +   int edgeflag_input; + +   struct { +      void *map; +      void (*push)(struct nouveau_channel *, void *); +      uint32_t stride; +      uint32_t divisor; +      uint32_t step; +   } attr[32]; +   int num_attrs; +}; + +static void +emit_b32_1(struct nouveau_channel *chan, void *data) +{ +   uint32_t *v = data; + +   OUT_RING(chan, v[0]); +} + +static void +emit_b32_2(struct nouveau_channel *chan, void *data) +{ +   uint32_t *v = data; + +   OUT_RING(chan, v[0]); +   OUT_RING(chan, v[1]); +} + +static void +emit_b32_3(struct nouveau_channel *chan, void *data) +{ +   uint32_t *v = data; + +   OUT_RING(chan, v[0]); +   OUT_RING(chan, v[1]); +   OUT_RING(chan, v[2]); +} + +static void +emit_b32_4(struct nouveau_channel *chan, void *data) +{ +   uint32_t *v = data; + +   OUT_RING(chan, v[0]); +   OUT_RING(chan, v[1]); +   OUT_RING(chan, v[2]); +   OUT_RING(chan, v[3]); +} + +static void +emit_b16_1(struct nouveau_channel *chan, void *data) +{ +   uint16_t *v = data; + +   OUT_RING(chan, v[0]); +} + +static void +emit_b16_3(struct nouveau_channel *chan, void *data) +{ +   uint16_t *v = data; + +   OUT_RING(chan, (v[1] << 16) | v[0]); +   OUT_RING(chan, v[2]); +} + +static void +emit_b08_1(struct nouveau_channel *chan, void *data) +{ +   uint8_t *v = data; + +   OUT_RING(chan, v[0]); +} + +static void +emit_b08_3(struct nouveau_channel *chan, void *data) +{ +   uint8_t *v = data; + +   OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); +} + +static void +emit_b64_1(struct nouveau_channel *chan, void *data) +{ +   double *v = data; + +   OUT_RINGf(chan, v[0]); +} + +static void +emit_b64_2(struct nouveau_channel *chan, void *data) +{ +   double *v = data; + +   OUT_RINGf(chan, v[0]); +   OUT_RINGf(chan, v[1]); +} + +static void +emit_b64_3(struct nouveau_channel *chan, void *data) +{ +   double *v = data; + +   OUT_RINGf(chan, v[0]); +   OUT_RINGf(chan, v[1]); +   OUT_RINGf(chan, v[2]); +} + +static void +emit_b64_4(struct nouveau_channel *chan, void *data) +{ +   double *v = data; + +   OUT_RINGf(chan, v[0]); +   OUT_RINGf(chan, v[1]); +   OUT_RINGf(chan, v[2]); +   OUT_RINGf(chan, v[3]);    +} + +static INLINE void +emit_vertex(struct push_context *ctx, unsigned n) +{ +   struct nouveau_channel *chan = ctx->nvc0->screen->base.channel; +   int i; + +   if (ctx->edgeflag_input < 32) { +      /* TODO */ +   } + +   BEGIN_RING_NI(chan, RING_3D(VERTEX_DATA), ctx->vertex_size); +   for (i = 0; i < ctx->num_attrs; ++i) +      ctx->attr[i].push(chan, +                        (uint8_t *)ctx->attr[i].map + n * ctx->attr[i].stride); +} + +static void +emit_edgeflag(struct push_context *ctx, boolean enabled) +{ +   struct nouveau_channel *chan = ctx->nvc0->screen->base.channel; +    +   INLIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), enabled); +} + +static void +emit_elt08(struct push_context *ctx, unsigned start, unsigned count) +{ +   uint8_t *idxbuf = ctx->idxbuf; + +   while (count--) +      emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt16(struct push_context *ctx, unsigned start, unsigned count) +{ +   uint16_t *idxbuf = ctx->idxbuf; + +   while (count--) +      emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt32(struct push_context *ctx, unsigned start, unsigned count) +{ +   uint32_t *idxbuf = ctx->idxbuf; + +   while (count--) +      emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_seq(struct push_context *ctx, unsigned start, unsigned count) +{ +   while (count--) +      emit_vertex(ctx, start++); +} + +#define NVC0_PRIM_GL_CASE(n) \ +   case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ +   switch (prim) { +   NVC0_PRIM_GL_CASE(POINTS); +   NVC0_PRIM_GL_CASE(LINES); +   NVC0_PRIM_GL_CASE(LINE_LOOP); +   NVC0_PRIM_GL_CASE(LINE_STRIP); +   NVC0_PRIM_GL_CASE(TRIANGLES); +   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); +   NVC0_PRIM_GL_CASE(TRIANGLE_FAN); +   NVC0_PRIM_GL_CASE(QUADS); +   NVC0_PRIM_GL_CASE(QUAD_STRIP); +   NVC0_PRIM_GL_CASE(POLYGON); +   NVC0_PRIM_GL_CASE(LINES_ADJACENCY); +   NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); +   NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); +   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); +   /* +   NVC0_PRIM_GL_CASE(PATCHES); */ +   default: +      return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; +      break; +   } +} + +void +nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ +   struct push_context ctx; +   unsigned i, n; +   unsigned inst = info->instance_count; +   unsigned prim = nvc0_prim_gl(info->mode); + +   ctx.nvc0 = nvc0; +   ctx.vertex_size = nvc0->vertex->vtx_size; +   ctx.idxbuf = NULL; +   ctx.num_attrs = 0; +   ctx.edgeflag = 0.5f; +   ctx.edgeflag_input = 32; + +   for (i = 0; i < nvc0->vertex->num_elements; ++i) { +      struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; +      struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; +      struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo; +      unsigned nr_components; + +      if (!(nvc0->vbo_fifo & (1 << i))) +         continue; +      n = ctx.num_attrs++; + +      if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) +         return; +      ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset; + +      nouveau_bo_unmap(bo); + +      ctx.attr[n].stride = vb->stride; +      ctx.attr[n].divisor = ve->instance_divisor; + +      nr_components = util_format_get_nr_components(ve->src_format); +      switch (util_format_get_component_bits(ve->src_format, +                                             UTIL_FORMAT_COLORSPACE_RGB, 0)) { +      case 8: +         switch (nr_components) { +         case 1: ctx.attr[n].push = emit_b08_1; break; +         case 2: ctx.attr[n].push = emit_b16_1; break; +         case 3: ctx.attr[n].push = emit_b08_3; break; +         case 4: ctx.attr[n].push = emit_b32_1; break; +         } +         break; +      case 16: +         switch (nr_components) { +         case 1: ctx.attr[n].push = emit_b16_1; break; +         case 2: ctx.attr[n].push = emit_b32_1; break; +         case 3: ctx.attr[n].push = emit_b16_3; break; +         case 4: ctx.attr[n].push = emit_b32_2; break; +         } +         break; +      case 32: +         switch (nr_components) { +         case 1: ctx.attr[n].push = emit_b32_1; break; +         case 2: ctx.attr[n].push = emit_b32_2; break; +         case 3: ctx.attr[n].push = emit_b32_3; break; +         case 4: ctx.attr[n].push = emit_b32_4; break; +         } +         break; +      default: +         assert(0); +         break; +      } +   } + +   if (info->indexed) { +      struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); +      if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD)) +         return; +      ctx.idxbuf = (uint8_t *)res->bo->map + nvc0->idxbuf.offset + res->offset; +      nouveau_bo_unmap(res->bo); +      ctx.idxsize = nvc0->idxbuf.index_size; +   } else { +      ctx.idxsize = 0; +   } + +   while (inst--) { +      BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1); +      OUT_RING  (nvc0->screen->base.channel, prim); +      switch (ctx.idxsize) { +      case 0: +         emit_seq(&ctx, info->start, info->count); +         break; +      case 1: +         emit_elt08(&ctx, info->start, info->count); +         break; +      case 2: +         emit_elt16(&ctx, info->start, info->count); +         break; +      case 4: +         emit_elt32(&ctx, info->start, info->count); +         break; +      } +      INLIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0); + +      prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; +   } +} diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c new file mode 100644 index 0000000000..181d917f22 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_resource.c @@ -0,0 +1,71 @@ + +#include "pipe/p_context.h" +#include "nvc0_resource.h" +#include "nouveau/nouveau_screen.h" + +static unsigned +nvc0_resource_is_referenced(struct pipe_context *pipe, +                            struct pipe_resource *resource, +                            unsigned face, unsigned level) +{ +   struct nvc0_resource *res = nvc0_resource(resource); +   unsigned flags = 0; + +#ifdef NOUVEAU_USERSPACE_MM +   flags = res->status; +#else +   unsigned bo_flags = nouveau_bo_pending(res->bo); +   if (bo_flags & NOUVEAU_BO_RD) +      flags = PIPE_REFERENCED_FOR_READ; +   if (bo_flags & NOUVEAU_BO_WR) +      flags |= PIPE_REFERENCED_FOR_WRITE; +#endif +   return flags; +} + +static struct pipe_resource * +nvc0_resource_create(struct pipe_screen *screen, +                     const struct pipe_resource *templ) +{ +   switch (templ->target) { +   case PIPE_BUFFER: +      return nvc0_buffer_create(screen, templ); +   default: +      return nvc0_miptree_create(screen, templ); +   } +} + +static struct pipe_resource * +nvc0_resource_from_handle(struct pipe_screen * screen, +                          const struct pipe_resource *templ, +                          struct winsys_handle *whandle) +{ +   if (templ->target == PIPE_BUFFER) +      return NULL; +   else +      return nvc0_miptree_from_handle(screen, templ, whandle); +} + +void +nvc0_init_resource_functions(struct pipe_context *pcontext) +{ +   pcontext->get_transfer = u_get_transfer_vtbl; +   pcontext->transfer_map = u_transfer_map_vtbl; +   pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; +   pcontext->transfer_unmap = u_transfer_unmap_vtbl; +   pcontext->transfer_destroy = u_transfer_destroy_vtbl; +   pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; +   pcontext->is_resource_referenced = nvc0_resource_is_referenced; +} + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen) +{ +   pscreen->resource_create = nvc0_resource_create; +   pscreen->resource_from_handle = nvc0_resource_from_handle; +   pscreen->resource_get_handle = u_resource_get_handle_vtbl; +   pscreen->resource_destroy = u_resource_destroy_vtbl; +   pscreen->user_buffer_create = nvc0_user_buffer_create; +   pscreen->get_tex_surface = nvc0_miptree_surface_new; +   pscreen->tex_surface_destroy = nvc0_miptree_surface_del; +} diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h new file mode 100644 index 0000000000..b9f3f7b5d8 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -0,0 +1,151 @@ + +#ifndef __NVC0_RESOURCE_H__ +#define __NVC0_RESOURCE_H__ + +#include "util/u_transfer.h" +#include "util/u_double_list.h" +#define NOUVEAU_NVC0 +#include "nouveau/nouveau_winsys.h" +#undef NOUVEAU_NVC0 + +#include "nvc0_fence.h" + +struct pipe_resource; +struct nouveau_bo; + +/* Resources, if mapped into the GPU's address space, are guaranteed to + * have constant virtual addresses. + * The address of a resource will lie within the nouveau_bo referenced, + * and this bo should be added to the memory manager's validation list. + */ +struct nvc0_resource { +   struct pipe_resource base; +   const struct u_resource_vtbl *vtbl; +   uint64_t address; + +   uint8_t *data; +   struct nouveau_bo *bo; +   uint32_t offset; + +   uint8_t status; +   uint8_t domain; + +   int16_t score; /* low if mapped very often, if high can move to VRAM */ + +   struct nvc0_fence *fence; +   struct nvc0_fence *fence_wr; + +   struct nvc0_mm_allocation *mm; +}; + +/* XXX: wait for fence (atm only using this for vertex push) */ +static INLINE void * +nvc0_resource_map_offset(struct nvc0_resource *res, uint32_t offset, +                         uint32_t flags) +{ +   void *map; + +   if (res->domain == 0) +      return res->data + offset; + +   if (nouveau_bo_map_range(res->bo, res->offset + offset, +                            res->base.width0, flags | NOUVEAU_BO_NOSYNC)) +      return NULL; + +   /* With suballocation, the same bo can be mapped several times, so unmap +    * immediately. Maps are guaranteed to persist. */ +   map = res->bo->map; +   nouveau_bo_unmap(res->bo); +   return map; +} + +static INLINE void +nvc0_resource_unmap(struct nvc0_resource *res) +{ +   if (res->domain != 0 && 0) +      nouveau_bo_unmap(res->bo); +} + +#define NVC0_TILE_H(m) (8 << ((m >> 4) & 0xf)) +#define NVC0_TILE_D(m) (1 << (m >> 8)) + +struct nvc0_miptree_level { +   int *image_offset; +   uint32_t pitch; +   uint32_t tile_mode; +}; + +#define NVC0_MAX_TEXTURE_LEVELS 16 + +struct nvc0_miptree { +   struct nvc0_resource base; +   struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS]; +   int image_nr; +   int total_size; +}; + +static INLINE struct nvc0_miptree * +nvc0_miptree(struct pipe_resource *pt) +{ +   return (struct nvc0_miptree *)pt; +} + +static INLINE struct nvc0_resource * +nvc0_resource(struct pipe_resource *resource) +{ +   return (struct nvc0_resource *)resource; +} + +/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */ +static INLINE boolean +nvc0_resource_mapped_by_gpu(struct pipe_resource *resource) +{ +   return nvc0_resource(resource)->domain != 0; +} + +void +nvc0_init_resource_functions(struct pipe_context *pcontext); + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen); + +/* Internal functions: + */ +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, +                    const struct pipe_resource *tmp); + +struct pipe_resource * +nvc0_miptree_from_handle(struct pipe_screen *pscreen, +                         const struct pipe_resource *template, +                         struct winsys_handle *whandle); + +struct pipe_resource * +nvc0_buffer_create(struct pipe_screen *pscreen, +                   const struct pipe_resource *templ); + +struct pipe_resource * +nvc0_user_buffer_create(struct pipe_screen *screen, +                        void *ptr, +                        unsigned bytes, +                        unsigned usage); + + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, +                         unsigned face, unsigned level, unsigned zslice, +                         unsigned flags); + +void +nvc0_miptree_surface_del(struct pipe_surface *ps); + +struct nvc0_context; + +boolean +nvc0_buffer_migrate(struct nvc0_context *, +                    struct nvc0_resource *, unsigned domain); + +boolean +nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c new file mode 100644 index 0000000000..24937217c0 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -0,0 +1,661 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "util/u_format_s3tc.h" +#include "pipe/p_screen.h" + +#include "nvc0_fence.h" +#include "nvc0_context.h" +#include "nvc0_screen.h" + +#include "nouveau/nv_object.xml.h" +#include "nvc0_graph_macros.h" + +static boolean +nvc0_screen_is_format_supported(struct pipe_screen *pscreen, +                                enum pipe_format format, +                                enum pipe_texture_target target, +                                unsigned sample_count, +                                unsigned bindings, unsigned geom_flags) +{ +   if (sample_count > 1) +      return FALSE; + +   if (!util_format_s3tc_enabled) { +      switch (format) { +      case PIPE_FORMAT_DXT1_RGB: +      case PIPE_FORMAT_DXT1_RGBA: +      case PIPE_FORMAT_DXT3_RGBA: +      case PIPE_FORMAT_DXT5_RGBA: +         return FALSE; +      default: +         break; +      } +   } + +   /* transfers & shared are always supported */ +   bindings &= ~(PIPE_BIND_TRANSFER_READ | +                 PIPE_BIND_TRANSFER_WRITE | +                 PIPE_BIND_SHARED); + +   return (nvc0_format_table[format].usage & bindings) == bindings; +} + +static int +nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ +   switch (param) { +   case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +   case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +      return 32; +   case PIPE_CAP_MAX_COMBINED_SAMPLERS: +      return 64; +   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +      return 13; +   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +      return 10; +   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +      return 13; +   case PIPE_CAP_TEXTURE_MIRROR_CLAMP: +   case PIPE_CAP_TEXTURE_MIRROR_REPEAT: +   case PIPE_CAP_TEXTURE_SWIZZLE: +   case PIPE_CAP_TEXTURE_SHADOW_MAP: +   case PIPE_CAP_NPOT_TEXTURES: +   case PIPE_CAP_ANISOTROPIC_FILTER: +      return 1; +   case PIPE_CAP_TWO_SIDED_STENCIL: +   case PIPE_CAP_DEPTH_CLAMP: +   case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: +   case PIPE_CAP_POINT_SPRITE: +      return 1; +   case PIPE_CAP_GLSL: +   case PIPE_CAP_SM3: +      return 1; +   case PIPE_CAP_MAX_RENDER_TARGETS: +      return 8; +   case PIPE_CAP_OCCLUSION_QUERY: +      return 1; +   case PIPE_CAP_TIMER_QUERY: +   case PIPE_CAP_STREAM_OUTPUT: +      return 0; +   case PIPE_CAP_BLEND_EQUATION_SEPARATE: +   case PIPE_CAP_INDEP_BLEND_ENABLE: +   case PIPE_CAP_INDEP_BLEND_FUNC: +      return 1; +   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: +   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: +      return 1; +   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: +   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: +      return 0; +   case PIPE_CAP_SHADER_STENCIL_EXPORT: +      return 0; +   case PIPE_CAP_PRIMITIVE_RESTART: +      return 1; +   default: +      NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); +      return 0; +   } +} + +static int +nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, +                             enum pipe_shader_cap param) +{ +   switch (shader) { +   case PIPE_SHADER_VERTEX: +      /* +   case PIPE_SHADER_TESSELLATION_CONTROL: +   case PIPE_SHADER_TESSELLATION_EVALUATION: +      */ +   case PIPE_SHADER_GEOMETRY: +   case PIPE_SHADER_FRAGMENT: +      break; +   default: +      return 0; +   } +    +   switch (param) { +   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: +   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: +   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: +   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: +      return 16384; +   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: +      return 4; +   case PIPE_SHADER_CAP_MAX_INPUTS: +      if (shader == PIPE_SHADER_VERTEX) +         return 32; +      return 0x300 / 16; +   case PIPE_SHADER_CAP_MAX_CONSTS: +      return 65536 / 16; +   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: +      return 14; +   case PIPE_SHADER_CAP_MAX_ADDRS: +      return 1; +   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: +   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: +      return shader != PIPE_SHADER_FRAGMENT; +   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: +   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: +      return 1; +   case PIPE_SHADER_CAP_MAX_PREDS: +      return 0; +   case PIPE_SHADER_CAP_MAX_TEMPS: +      return NVC0_CAP_MAX_PROGRAM_TEMPS; +   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: +      return 1; +   default: +      NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); +      return 0; +   } +} + +static float +nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param) +{ +   switch (param) { +   case PIPE_CAP_MAX_LINE_WIDTH: +   case PIPE_CAP_MAX_LINE_WIDTH_AA: +      return 10.0f; +   case PIPE_CAP_MAX_POINT_WIDTH: +   case PIPE_CAP_MAX_POINT_WIDTH_AA: +      return 64.0f; +   case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +      return 16.0f; +   case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +      return 4.0f; +   default: +      NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); +      return 0.0f; +   } +} + +static void +nvc0_screen_destroy(struct pipe_screen *pscreen) +{ +   struct nvc0_screen *screen = nvc0_screen(pscreen); + +   nouveau_bo_ref(NULL, &screen->text); +   nouveau_bo_ref(NULL, &screen->tls); +   nouveau_bo_ref(NULL, &screen->txc); +   nouveau_bo_ref(NULL, &screen->fence.bo); +   nouveau_bo_ref(NULL, &screen->mp_stack_bo); + +   nouveau_resource_destroy(&screen->text_heap); + +   if (screen->tic.entries) +      FREE(screen->tic.entries); + +   nouveau_grobj_free(&screen->fermi); +   nouveau_grobj_free(&screen->eng2d); +   nouveau_grobj_free(&screen->m2mf); + +   nouveau_screen_fini(&screen->base); + +   FREE(screen); +} + +static int +nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, +                     unsigned size, const uint32_t *data) +{ +   struct nouveau_channel *chan = screen->base.channel; + +   size /= 4; + +   BEGIN_RING(chan, RING_3D_(NVC0_GRAPH_MACRO_ID), 2); +   OUT_RING  (chan, (m - 0x3800) / 8); +   OUT_RING  (chan, pos); +   BEGIN_RING_1I(chan, RING_3D_(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); +   OUT_RING  (chan, pos); +   OUT_RINGp (chan, data, size); + +   return pos + size; +} + +static void +nvc0_screen_fence_reference(struct pipe_screen *pscreen, +                            struct pipe_fence_handle **ptr, +                            struct pipe_fence_handle *fence) +{ +   nvc0_fence_reference((struct nvc0_fence **)ptr, nvc0_fence(fence)); +} + +static int +nvc0_screen_fence_signalled(struct pipe_screen *pscreen, +                            struct pipe_fence_handle *fence, +                            unsigned flags) +{ +   return !(((struct nvc0_fence *)fence)->state == NVC0_FENCE_STATE_SIGNALLED); +} + +static int +nvc0_screen_fence_finish(struct pipe_screen *pscreen, +                         struct pipe_fence_handle *fence, +                         unsigned flags) +{ +   return nvc0_fence_wait((struct nvc0_fence *)fence) != TRUE; +} + +static void +nvc0_magic_3d_init(struct nouveau_channel *chan) +{ +   BEGIN_RING(chan, RING_3D_(0x10cc), 1); +   OUT_RING  (chan, 0xff); +   BEGIN_RING(chan, RING_3D_(0x10e0), 2); +   OUT_RING(chan, 0xff); +   OUT_RING(chan, 0xff); +   BEGIN_RING(chan, RING_3D_(0x10ec), 2); +   OUT_RING(chan, 0xff); +   OUT_RING(chan, 0xff); +   BEGIN_RING(chan, RING_3D_(0x074c), 1); +   OUT_RING  (chan, 0x3f); + +   BEGIN_RING(chan, RING_3D_(0x10f8), 1); +   OUT_RING  (chan, 0x0101); + +   BEGIN_RING(chan, RING_3D_(0x16a8), 1); +   OUT_RING  (chan, (3 << 16) | 3); +   BEGIN_RING(chan, RING_3D_(0x1794), 1); +   OUT_RING  (chan, (2 << 16) | 2); +   BEGIN_RING(chan, RING_3D_(0x0de8), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D_(0x165c), 1); +   OUT_RING  (chan, 0); + +   BEGIN_RING(chan, RING_3D_(0x1528), 1); /* MP poke */ +   OUT_RING  (chan, 0); + +   BEGIN_RING(chan, RING_3D_(0x12ac), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D_(0x0218), 1); +   OUT_RING  (chan, 0x10); +   BEGIN_RING(chan, RING_3D_(0x10fc), 1); +   OUT_RING  (chan, 0x10); +   BEGIN_RING(chan, RING_3D_(0x1290), 1); +   OUT_RING  (chan, 0x10); +   BEGIN_RING(chan, RING_3D_(0x12d8), 2); +   OUT_RING  (chan, 0x10); +   OUT_RING  (chan, 0x10); +   BEGIN_RING(chan, RING_3D_(0x06d4), 1); +   OUT_RING  (chan, 8); +   BEGIN_RING(chan, RING_3D_(0x1140), 1); +   OUT_RING  (chan, 0x10); +   BEGIN_RING(chan, RING_3D_(0x1610), 1); +   OUT_RING  (chan, 0xe); + +   BEGIN_RING(chan, RING_3D_(0x164c), 1); +   OUT_RING  (chan, 1 << 12); +   BEGIN_RING(chan, RING_3D_(0x151c), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D_(0x020c), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D_(0x030c), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D_(0x0300), 1); +   OUT_RING  (chan, 3); +   BEGIN_RING(chan, RING_3D_(0x1280), 1); /* PGRAPH poke */ +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D_(0x02d0), 1); +   OUT_RING  (chan, 0x1f40); +   BEGIN_RING(chan, RING_3D_(0x00fdc), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D_(0x19c0), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D_(0x075c), 1); +   OUT_RING  (chan, 3); + +   BEGIN_RING(chan, RING_3D_(0x0fac), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D_(0x0f90), 1); +   OUT_RING  (chan, 0); +} + +#define FAIL_SCREEN_INIT(str, err)                    \ +   do {                                               \ +      NOUVEAU_ERR(str, err);                          \ +      nvc0_screen_destroy(pscreen);                   \ +      return NULL;                                    \ +   } while(0) + +struct pipe_screen * +nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) +{ +   struct nvc0_screen *screen; +   struct nouveau_channel *chan; +   struct pipe_screen *pscreen; +   int ret; +   unsigned i; + +   screen = CALLOC_STRUCT(nvc0_screen); +   if (!screen) +      return NULL; +   pscreen = &screen->base.base; + +   ret = nouveau_screen_init(&screen->base, dev); +   if (ret) { +      nvc0_screen_destroy(pscreen); +      return NULL; +   } +   chan = screen->base.channel; + +   pscreen->winsys = ws; +   pscreen->destroy = nvc0_screen_destroy; +   pscreen->context_create = nvc0_create; +   pscreen->is_format_supported = nvc0_screen_is_format_supported; +   pscreen->get_param = nvc0_screen_get_param; +   pscreen->get_shader_param = nvc0_screen_get_shader_param; +   pscreen->get_paramf = nvc0_screen_get_paramf; +   pscreen->fence_reference = nvc0_screen_fence_reference; +   pscreen->fence_signalled = nvc0_screen_fence_signalled; +   pscreen->fence_finish = nvc0_screen_fence_finish; + +   nvc0_screen_init_resource_functions(pscreen); + +   screen->base.vertex_buffer_flags = NOUVEAU_BO_GART; +   screen->base.index_buffer_flags = 0; + +   ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, 4096, &screen->fence.bo); +   if (ret) +      goto fail; +   nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR); +   screen->fence.map = screen->fence.bo->map; +   nouveau_bo_unmap(screen->fence.bo); + +   for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) { +      ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE, +                           &screen->scratch.bo[i]); +      if (ret) +         goto fail; +   } + +   for (i = 0; i < 8; ++i) { +      BEGIN_RING(chan, (i << 13) | (0x0000 >> 2), 1); +      OUT_RING  (chan, 0x0000); +   } + +   ret = nouveau_grobj_alloc(chan, 0xbeef9039, NVC0_M2MF, &screen->m2mf); +   if (ret) +      FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); + +   BIND_RING (chan, screen->m2mf, NVC0_SUBCH_MF); +   BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3); +   OUT_RELOCh(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); +   OUT_RELOCl(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); +   OUT_RING  (chan, 0); + +   ret = nouveau_grobj_alloc(chan, 0xbeef902d, NVC0_2D, &screen->eng2d); +   if (ret) +      FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret); + +   BIND_RING (chan, screen->eng2d, NVC0_SUBCH_2D); +   BEGIN_RING(chan, RING_2D(OPERATION), 1); +   OUT_RING  (chan, NVC0_2D_OPERATION_SRCCOPY); +   BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_2D_(0x0884), 1); +   OUT_RING  (chan, 0x3f); +   BEGIN_RING(chan, RING_2D_(0x0888), 1); +   OUT_RING  (chan, 1); + +   ret = nouveau_grobj_alloc(chan, 0xbeef9097, NVC0_3D, &screen->fermi); +   if (ret) +      FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); + +   BIND_RING (chan, screen->fermi, NVC0_SUBCH_3D); +   BEGIN_RING(chan, RING_3D(NOTIFY_ADDRESS_HIGH), 3); +   OUT_RELOCh(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); +   OUT_RELOCl(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); +   OUT_RING  (chan, 0); + +   BEGIN_RING(chan, RING_3D(COND_MODE), 1); +   OUT_RING  (chan, NVC0_3D_COND_MODE_ALWAYS); + +   BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); +   OUT_RING  (chan, 1); + +   BEGIN_RING(chan, RING_3D(MULTISAMPLE_ZETA_ENABLE), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D(MULTISAMPLE_COLOR_ENABLE), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1); +   OUT_RING  (chan, NVC0_3D_MULTISAMPLE_MODE_1X); +   BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1); +   OUT_RING  (chan, 0); + +   nvc0_magic_3d_init(chan); + +   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, &screen->text); +   if (ret) +      goto fail; +   /* nouveau_bo_pin(dev, screen->text); */ + +   nouveau_resource_init(&screen->text_heap, 0, 1 << 20); + +   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, +                        &screen->uniforms); +   if (ret) +      goto fail; + +   /* auxiliary constants (6 user clip planes, base instance id) */ +   BEGIN_RING(chan, RING_3D(CB_SIZE), 3); +   OUT_RING  (chan, 256); +   OUT_RELOCh(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +   OUT_RELOCl(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +   for (i = 0; i < 5; ++i) { +      BEGIN_RING(chan, RING_3D(CB_BIND(i)), 1); +      OUT_RING  (chan, (15 << 4) | 1); +   } + +   screen->tls_size = 4 * 4 * 32 * 128 * 4; +   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, +                        screen->tls_size, &screen->tls); +   if (ret) +      goto fail; + +   BEGIN_RING(chan, RING_3D(CODE_ADDRESS_HIGH), 2); +   OUT_RELOCh(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +   OUT_RELOCl(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +   BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 4); +   OUT_RELOCh(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +   OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +   OUT_RING  (chan, screen->tls_size >> 32); +   OUT_RING  (chan, screen->tls_size); +   BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1); +   OUT_RING  (chan, 0); + +   for (i = 0; i < 5; ++i) { +      BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1); +      OUT_RING  (chan, 0x54); +   } +   BEGIN_RING(chan, RING_3D(LINKED_TSC), 1); +   OUT_RING  (chan, 0); + +   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, +                        &screen->mp_stack_bo); +   if (ret) +      goto fail; +   /* nouveau_bo_pin(dev, screen->mp_stack_bo); */ + +   BEGIN_RING(chan, RING_3D_(0x17bc), 3); +   OUT_RELOCh(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); +   OUT_RELOCl(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); +   OUT_RING  (chan, 1); + +   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc); +   if (ret) +      goto fail; +   /* nouveau_bo_pin(dev, screen->txc); */ + +   BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3); +   OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +   OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +   OUT_RING  (chan, NVC0_TIC_MAX_ENTRIES - 1); + +   BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3); +   OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +   OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +   OUT_RING  (chan, NVC0_TSC_MAX_ENTRIES - 1); + +   BEGIN_RING(chan, RING_3D(Y_ORIGIN_BOTTOM), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2); +   OUT_RING  (chan, 0); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */ +   OUT_RING  (chan, 0x3f); + +   BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1); +   OUT_RING  (chan, 0); + +   BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); +   OUT_RINGf (chan, 0.0f); +   OUT_RINGf (chan, 1.0f); + +   /* We use scissors instead of exact view volume clipping, +    * so they're always enabled. +    */ +   BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3); +   OUT_RING  (chan, 1); +   OUT_RING  (chan, 8192 << 16); +   OUT_RING  (chan, 8192 << 16); + +   BEGIN_RING(chan, RING_3D_(0x0fac), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D_(0x3484), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D_(0x0dbc), 1); +   OUT_RING  (chan, 0x00010000); +   BEGIN_RING(chan, RING_3D_(0x0dd8), 1); +   OUT_RING  (chan, 0xff800006); +   BEGIN_RING(chan, RING_3D_(0x3488), 1); +   OUT_RING  (chan, 0); + +#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); + +   i = 0; +   MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables); +   MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select); +   MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select); +   MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select); +   MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front); +   MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back); +   MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc); + +   BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D(GP_SELECT), 1); +   OUT_RING  (chan, 0x40); +   BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); +   OUT_RING  (chan, 0x30); +   BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1); +   OUT_RING  (chan, 3); +   BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1); +   OUT_RING  (chan, 0x20); +   BEGIN_RING(chan, RING_3D(SP_SELECT(0)), 1); +   OUT_RING  (chan, 0x00); + +   BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1); +   OUT_RING  (chan, 0); + +   BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); +   OUT_RING  (chan, 0x11111111); +   BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1); +   OUT_RING  (chan, 1); + +   BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); +   OUT_RING  (chan, 0xab); +   OUT_RING  (chan, 0x00000000); + +   FIRE_RING (chan); + +   screen->tic.entries = CALLOC(4096, sizeof(void *)); +   screen->tsc.entries = screen->tic.entries + 2048; + +   screen->mm_GART = nvc0_mm_create(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, +                                    0x000); +   screen->mm_VRAM = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0x000); +   screen->mm_VRAM_fe0 = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); + +   nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); + +   return pscreen; + +fail: +   nvc0_screen_destroy(pscreen); +   return NULL; +} + +void +nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) +{ +   struct nouveau_channel *chan = screen->base.channel; + +   const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + +   nouveau_reloc_emit(chan, NULL, 0, NULL, screen->text, 0, 0, flags, 0, 0); +   nouveau_reloc_emit(chan, NULL, 0, NULL, screen->txc, 0, 0, flags, 0, 0); +   nouveau_reloc_emit(chan, NULL, 0, NULL, screen->tls, 0, 0, flags, 0, 0); +} + +int +nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry) +{ +   int i = screen->tic.next; + +   while (screen->tic.lock[i / 32] & (1 << (i % 32))) +      i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + +   screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + +   if (screen->tic.entries[i]) +      nvc0_tic_entry(screen->tic.entries[i])->id = -1; + +   screen->tic.entries[i] = entry; +   return i; +} + +int +nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry) +{ +   int i = screen->tsc.next; + +   while (screen->tsc.lock[i / 32] & (1 << (i % 32))) +      i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + +   screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + +   if (screen->tsc.entries[i]) +      nvc0_tsc_entry(screen->tsc.entries[i])->id = -1; + +   screen->tsc.entries[i] = entry; +   return i; +} diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h new file mode 100644 index 0000000000..8aa77da2bf --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -0,0 +1,175 @@ +#ifndef __NVC0_SCREEN_H__ +#define __NVC0_SCREEN_H__ + +#define NOUVEAU_NVC0 +#include "nouveau/nouveau_screen.h" +#undef NOUVEAU_NVC0 +#include "nvc0_winsys.h" +#include "nvc0_stateobj.h" + +#define NVC0_TIC_MAX_ENTRIES 2048 +#define NVC0_TSC_MAX_ENTRIES 2048 + +struct nvc0_mman; +struct nvc0_context; +struct nvc0_fence; + +#define NVC0_SCRATCH_SIZE (2 << 20) +#define NVC0_SCRATCH_NR_BUFFERS 2 + +struct nvc0_screen { +   struct nouveau_screen base; +   struct nouveau_winsys *nvws; + +   struct nvc0_context *cur_ctx; + +   struct nouveau_bo *text; +   struct nouveau_bo *uniforms; +   struct nouveau_bo *tls; +   struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ +   struct nouveau_bo *mp_stack_bo; + +   uint64_t tls_size; + +   struct nouveau_resource *text_heap; + +   struct { +      struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS]; +      uint8_t *buf; +      int index; +      uint32_t offset; +   } scratch; + +   struct { +      void **entries; +      int next; +      uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32]; +   } tic; +    +   struct { +      void **entries; +      int next; +      uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32]; +   } tsc; + +   struct { +      uint32_t *map; +      struct nvc0_fence *head; +      struct nvc0_fence *tail; +      struct nvc0_fence *current; +      uint32_t sequence; +      uint32_t sequence_ack; +      struct nouveau_bo *bo; +   } fence; + +   struct nvc0_mman *mm_GART; +   struct nvc0_mman *mm_VRAM; +   struct nvc0_mman *mm_VRAM_fe0; + +   struct nouveau_grobj *fermi; +   struct nouveau_grobj *eng2d; +   struct nouveau_grobj *m2mf; +}; + +static INLINE struct nvc0_screen * +nvc0_screen(struct pipe_screen *screen) +{ +   return (struct nvc0_screen *)screen; +} + +/* Since a resource can be migrated, we need to decouple allocations from + * them. This struct is linked with fences for delayed freeing of allocs. + */ +struct nvc0_mm_allocation { +   struct nvc0_mm_allocation *next; +   void *priv; +   uint32_t offset; +}; + +extern struct nvc0_mman * +nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type); + +extern struct nvc0_mm_allocation * +nvc0_mm_allocate(struct nvc0_mman *, +                 uint32_t size, struct nouveau_bo **, uint32_t *offset); +extern void +nvc0_mm_free(struct nvc0_mm_allocation *); + +void nvc0_screen_make_buffers_resident(struct nvc0_screen *); + +int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); +int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); + +static INLINE void +nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) +{ +   struct nvc0_screen *screen = nvc0_screen(res->base.screen); + +   assert(res->mm); + +   nvc0_fence_reference(&res->fence, screen->fence.current); + +   if (flags & NOUVEAU_BO_WR) +      nvc0_fence_reference(&res->fence_wr, screen->fence.current); + +   nouveau_reloc_emit(screen->base.channel, +                      NULL, 0, NULL, res->bo, 0, 0, NOUVEAU_BO_RDWR, 0, 0); +} + + +boolean +nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); + +void +nvc0_screen_fence_next(struct nvc0_screen *); + +static INLINE boolean +nvc0_screen_fence_emit(struct nvc0_screen *screen) +{ +   nvc0_fence_emit(screen->fence.current); + +   return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); +} + +struct nvc0_format { +   uint32_t rt; +   uint32_t tic; +   uint32_t vtx; +   uint32_t usage; +}; + +extern const struct nvc0_format nvc0_format_table[]; + +static INLINE void +nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nvc0_tic_entry *tic) +{ +   if (tic->id >= 0) +      screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); +} + +static INLINE void +nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc) +{ +   if (tsc->id >= 0) +      screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); +} + +static INLINE void +nvc0_screen_tic_free(struct nvc0_screen *screen, struct nvc0_tic_entry *tic) +{ +   if (tic->id >= 0) { +      screen->tic.entries[tic->id] = NULL; +      screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); +   } +} + +static INLINE void +nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc) +{ +   if (tsc->id >= 0) { +      screen->tsc.entries[tsc->id] = NULL; +      screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); +   } +} + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c new file mode 100644 index 0000000000..a6595c5610 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -0,0 +1,180 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "nvc0_context.h" + +static boolean +nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ +   int ret; +   unsigned size; + +   if (prog->translated) +      return TRUE; + +   prog->translated = nvc0_program_translate(prog); +   if (!prog->translated) +      return FALSE; + +   size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100); + +   ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog, +                                &prog->res); +   if (ret) +      return FALSE; + +   prog->code_base = prog->res->start; + +   nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM, +                         prog->code_base, NVC0_SHADER_HEADER_SIZE, prog->hdr); +   nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM, +                         prog->code_base + NVC0_SHADER_HEADER_SIZE, +                         prog->code_size, prog->code); + +   BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1); +   OUT_RING  (nvc0->screen->base.channel, 0x1111); + +   return TRUE; +} + +void +nvc0_vertprog_validate(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct nvc0_program *vp = nvc0->vertprog; + +   if (nvc0->clip.nr > vp->vp.num_ucps) { +      assert(nvc0->clip.nr <= 6); +      vp->vp.num_ucps = 6; + +      if (vp->translated) +         nvc0_program_destroy(nvc0, vp); +   } + +   if (!nvc0_program_validate(nvc0, vp)) +         return; + +   BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2); +   OUT_RING  (chan, 0x11); +   OUT_RING  (chan, vp->code_base); +   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1); +   OUT_RING  (chan, vp->max_gpr); + +   // BEGIN_RING(chan, RING_3D_(0x163c), 1); +   // OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D(VERT_COLOR_CLAMP_EN), 1); +   OUT_RING  (chan, 1); +} + +void +nvc0_fragprog_validate(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct nvc0_program *fp = nvc0->fragprog; + +   if (!nvc0_program_validate(nvc0, fp)) +         return; + +   BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2); +   OUT_RING  (chan, 0x51); +   OUT_RING  (chan, fp->code_base); +   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(5)), 1); +   OUT_RING  (chan, fp->max_gpr); + +   BEGIN_RING(chan, RING_3D_(0x0360), 2); +   OUT_RING  (chan, 0x20164010); +   OUT_RING  (chan, 0x20); +   BEGIN_RING(chan, RING_3D_(0x196c), 1); +   OUT_RING  (chan, fp->flags[0]); +} + +void +nvc0_tctlprog_validate(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct nvc0_program *tp = nvc0->tctlprog; + +   if (!tp) { +      BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1); +      OUT_RING  (chan, 0x20); +      return; +   } +   if (!nvc0_program_validate(nvc0, tp)) +         return; + +   BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2); +   OUT_RING  (chan, 0x21); +   OUT_RING  (chan, tp->code_base); +   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(2)), 1); +   OUT_RING  (chan, tp->max_gpr);    +} + +void +nvc0_tevlprog_validate(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct nvc0_program *tp = nvc0->tevlprog; + +   if (!tp) { +      BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); +      OUT_RING  (chan, 0x30); +      return; +   } +   if (!nvc0_program_validate(nvc0, tp)) +         return; + +   BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); +   OUT_RING  (chan, 0x31); +   BEGIN_RING(chan, RING_3D(SP_START_ID(3)), 1); +   OUT_RING  (chan, tp->code_base); +   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(3)), 1); +   OUT_RING  (chan, tp->max_gpr);    +} + +void +nvc0_gmtyprog_validate(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct nvc0_program *gp = nvc0->gmtyprog; + +   if (!gp) { +      BEGIN_RING(chan, RING_3D(GP_SELECT), 1); +      OUT_RING  (chan, 0x40); +      return; +   } +   if (!nvc0_program_validate(nvc0, gp)) +         return; + +   BEGIN_RING(chan, RING_3D(GP_SELECT), 1); +   OUT_RING  (chan, 0x41); +   BEGIN_RING(chan, RING_3D(SP_START_ID(4)), 1); +   OUT_RING  (chan, gp->code_base); +   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1); +   OUT_RING  (chan, gp->max_gpr);    +} diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c new file mode 100644 index 0000000000..a462b543ad --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -0,0 +1,854 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + +#include "tgsi/tgsi_parse.h" + +#include "nvc0_stateobj.h" +#include "nvc0_context.h" + +#include "nvc0_3d.xml.h" +#include "nv50_texture.xml.h" + +#include "nouveau/nouveau_gldefs.h" + +static INLINE uint32_t +nvc0_colormask(unsigned mask) +{ +    uint32_t ret = 0; + +    if (mask & PIPE_MASK_R) +        ret |= 0x0001; +    if (mask & PIPE_MASK_G) +        ret |= 0x0010; +    if (mask & PIPE_MASK_B) +        ret |= 0x0100; +    if (mask & PIPE_MASK_A) +        ret |= 0x1000; + +    return ret; +} + +static INLINE uint32_t +nvc0_blend_fac(unsigned factor) +{ +    static const uint16_t bf[] = { +        NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */ +        NV50_3D_BLEND_FACTOR_ONE, +        NV50_3D_BLEND_FACTOR_SRC_COLOR, +        NV50_3D_BLEND_FACTOR_SRC_ALPHA, +        NV50_3D_BLEND_FACTOR_DST_ALPHA, +        NV50_3D_BLEND_FACTOR_DST_COLOR, +        NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE, +        NV50_3D_BLEND_FACTOR_CONSTANT_COLOR, +        NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA, +        NV50_3D_BLEND_FACTOR_SRC1_COLOR, +        NV50_3D_BLEND_FACTOR_SRC1_ALPHA, +        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */ +        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */ +        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */ +        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */ +        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */ +        NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */ +        NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */ +        NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, +        NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, +        NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, +        NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR, +        NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, +        NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, +        NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR, +        NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA +    }; + +    assert(factor < (sizeof(bf) / sizeof(bf[0]))); +    return bf[factor]; +} + +static void * +nvc0_blend_state_create(struct pipe_context *pipe, +                        const struct pipe_blend_state *cso) +{ +    struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj); +    int i; + +    so->pipe = *cso; + +    SB_OUT_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable); + +    if (!cso->independent_blend_enable) { +        SB_BEGIN_3D(so, BLEND_ENABLES, 1); +        SB_DATA    (so, cso->rt[0].blend_enable ? 0xff : 0); + +        if (cso->rt[0].blend_enable) { +            SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); +            SB_DATA    (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); +            SB_DATA    (so, nvc0_blend_fac(cso->rt[0].rgb_src_factor)); +            SB_DATA    (so, nvc0_blend_fac(cso->rt[0].rgb_dst_factor)); +            SB_DATA    (so, nvgl_blend_eqn(cso->rt[0].alpha_func)); +            SB_DATA    (so, nvc0_blend_fac(cso->rt[0].alpha_src_factor)); +            SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1); +            SB_DATA    (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor)); +        } + +        SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1); +        SB_DATA    (so, nvc0_colormask(cso->rt[0].colormask)); +    } else { +        uint8_t en = 0; + +        for (i = 0; i < 8; ++i) { +            if (!cso->rt[i].blend_enable) +                continue; +            en |= 1 << i; + +            SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6); +            SB_DATA    (so, nvgl_blend_eqn(cso->rt[i].rgb_func)); +            SB_DATA    (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor)); +            SB_DATA    (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor)); +            SB_DATA    (so, nvgl_blend_eqn(cso->rt[i].alpha_func)); +            SB_DATA    (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor)); +            SB_DATA    (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor)); +        } +        SB_BEGIN_3D(so, BLEND_ENABLES, 1); +        SB_DATA    (so, en); + +        SB_BEGIN_3D(so, COLOR_MASK(0), 8); +        for (i = 0; i < 8; ++i) +            SB_DATA(so, nvc0_colormask(cso->rt[i].colormask)); +    } + +    if (cso->logicop_enable) { +       SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); +       SB_DATA    (so, 1); +       SB_DATA    (so, nvgl_logicop_func(cso->logicop_func)); +    } else { +       SB_OUT_3D  (so, LOGIC_OP_ENABLE, 0); +    } + +    assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); +    return so; +} + +static void +nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->blend = hwcso; +    nvc0->dirty |= NVC0_NEW_BLEND; +} + +static void +nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ +    FREE(hwcso); +} + +static void * +nvc0_rasterizer_state_create(struct pipe_context *pipe, +                             const struct pipe_rasterizer_state *cso) +{ +    struct nvc0_rasterizer_stateobj *so; + +    so = CALLOC_STRUCT(nvc0_rasterizer_stateobj); +    if (!so) +        return NULL; +    so->pipe = *cso; + +#ifndef NVC0_SCISSORS_CLIPPING +    SB_OUT_3D  (so, SCISSOR_ENABLE(0), cso->scissor); +#endif +     +    SB_BEGIN_3D(so, SHADE_MODEL, 1); +    SB_DATA    (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : +                                     NVC0_3D_SHADE_MODEL_SMOOTH); +    SB_OUT_3D  (so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); +    SB_OUT_3D  (so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); + +    SB_BEGIN_3D(so, LINE_WIDTH, 1); +    SB_DATA    (so, fui(cso->line_width)); +    SB_OUT_3D  (so, LINE_SMOOTH_ENABLE, cso->line_smooth); + +    SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1); +    if (cso->line_stipple_enable) { +        SB_DATA    (so, 1); +        SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1); +        SB_DATA    (so, (cso->line_stipple_pattern << 8) | +                         cso->line_stipple_factor); +                     +    } else { +        SB_DATA    (so, 0); +    } + +    SB_OUT_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex); +    if (!cso->point_size_per_vertex) { +       SB_BEGIN_3D(so, POINT_SIZE, 1); +       SB_DATA    (so, fui(cso->point_size)); +    } +    SB_OUT_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization); + +    SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 1); +    SB_DATA    (so, nvgl_polygon_mode(cso->fill_front)); +    SB_BEGIN_3D(so, POLYGON_MODE_BACK, 1); +    SB_DATA    (so, nvgl_polygon_mode(cso->fill_back)); +    SB_OUT_3D  (so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth); + +    SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); +    SB_DATA    (so, cso->cull_face != PIPE_FACE_NONE); +    SB_DATA    (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW : +                                     NVC0_3D_FRONT_FACE_CW); +    switch (cso->cull_face) { +    case PIPE_FACE_FRONT_AND_BACK: +       SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK); +       break; +    case PIPE_FACE_FRONT: +       SB_DATA(so, NVC0_3D_CULL_FACE_FRONT); +       break; +    case PIPE_FACE_BACK: +    default: +       SB_DATA(so, NVC0_3D_CULL_FACE_BACK); +       break; +    } + +    SB_OUT_3D  (so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable); +    SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3); +    SB_DATA    (so, cso->offset_point); +    SB_DATA    (so, cso->offset_line); +    SB_DATA    (so, cso->offset_tri); + +    if (cso->offset_point || cso->offset_line || cso->offset_tri) { +        SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); +        SB_DATA    (so, fui(cso->offset_scale)); +        SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); +        SB_DATA    (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */ +    } + +    assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); +    return (void *)so; +} + +static void +nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ +   struct nvc0_context *nvc0 = nvc0_context(pipe); + +   nvc0->rast = hwcso; +   nvc0->dirty |= NVC0_NEW_RASTERIZER; +} + +static void +nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ +   FREE(hwcso); +} + +static void * +nvc0_zsa_state_create(struct pipe_context *pipe, +                      const struct pipe_depth_stencil_alpha_state *cso) +{ +   struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj); + +   so->pipe = *cso; + +   SB_OUT_3D  (so, DEPTH_WRITE_ENABLE, cso->depth.writemask); +   SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1); +   if (cso->depth.enabled) { +      SB_DATA    (so, 1); +      SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); +      SB_DATA    (so, nvgl_comparison_op(cso->depth.func)); +   } else { +      SB_DATA    (so, 0); +   } + +   if (cso->stencil[0].enabled) { +      SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5); +      SB_DATA    (so, 1); +      SB_DATA    (so, nvgl_stencil_op(cso->stencil[0].fail_op)); +      SB_DATA    (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); +      SB_DATA    (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); +      SB_DATA    (so, nvgl_comparison_op(cso->stencil[0].func)); +      SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); +      SB_DATA    (so, cso->stencil[0].writemask); +      SB_DATA    (so, cso->stencil[0].valuemask); +   } else { +      SB_OUT_3D  (so, STENCIL_FRONT_ENABLE, 0); +   } + +   if (cso->stencil[1].enabled) { +      SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); +      SB_DATA    (so, 1); +      SB_DATA    (so, nvgl_stencil_op(cso->stencil[1].fail_op)); +      SB_DATA    (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); +      SB_DATA    (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); +      SB_DATA    (so, nvgl_comparison_op(cso->stencil[1].func)); +      SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); +      SB_DATA    (so, cso->stencil[1].writemask); +      SB_DATA    (so, cso->stencil[1].valuemask); +   } else { +      SB_OUT_3D  (so, STENCIL_TWO_SIDE_ENABLE, 0); +   } +     +   SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1); +   if (cso->alpha.enabled) { +      SB_DATA    (so, 1); +      SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); +      SB_DATA    (so, fui(cso->alpha.ref_value)); +      SB_DATA    (so, nvgl_comparison_op(cso->alpha.func)); +   } else { +      SB_DATA    (so, 0); +   } + +   assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); +   return (void *)so; +} + +static void +nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso) +{ +   struct nvc0_context *nvc0 = nvc0_context(pipe); + +   nvc0->zsa = hwcso; +   nvc0->dirty |= NVC0_NEW_ZSA; +} + +static void +nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso) +{ +   FREE(hwcso); +} + +/* ====================== SAMPLERS AND TEXTURES ================================ + */ + +#define NV50_TSC_WRAP_CASE(n) \ +    case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n + +static INLINE unsigned +nv50_tsc_wrap_mode(unsigned wrap) +{ +   switch (wrap) { +   NV50_TSC_WRAP_CASE(REPEAT); +   NV50_TSC_WRAP_CASE(MIRROR_REPEAT); +   NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); +   NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); +   NV50_TSC_WRAP_CASE(CLAMP); +   NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); +   NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); +   NV50_TSC_WRAP_CASE(MIRROR_CLAMP); +   default: +       NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); +       return NV50_TSC_WRAP_REPEAT; +   } +} + +static void * +nvc0_sampler_state_create(struct pipe_context *pipe, +                          const struct pipe_sampler_state *cso) +{ +   struct nvc0_tsc_entry *so = CALLOC_STRUCT(nvc0_tsc_entry); +   float f[2]; + +   so->id = -1; + +   so->tsc[0] = (0x00026000 | +                 (nv50_tsc_wrap_mode(cso->wrap_s) << 0) | +                 (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | +                 (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); + +   switch (cso->mag_img_filter) { +   case PIPE_TEX_FILTER_LINEAR: +      so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR; +      break; +   case PIPE_TEX_FILTER_NEAREST: +   default: +      so->tsc[1] |= NV50_TSC_1_MAGF_NEAREST; +      break; +   } + +   switch (cso->min_img_filter) { +   case PIPE_TEX_FILTER_LINEAR: +      so->tsc[1] |= NV50_TSC_1_MINF_LINEAR; +      break; +   case PIPE_TEX_FILTER_NEAREST: +   default: +      so->tsc[1] |= NV50_TSC_1_MINF_NEAREST; +      break; +   } + +   switch (cso->min_mip_filter) { +   case PIPE_TEX_MIPFILTER_LINEAR: +      so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR; +      break; +   case PIPE_TEX_MIPFILTER_NEAREST: +      so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST; +      break; +   case PIPE_TEX_MIPFILTER_NONE: +   default: +      so->tsc[1] |= NV50_TSC_1_MIPF_NONE; +      break; +   } + +   if (cso->max_anisotropy >= 16) +      so->tsc[0] |= (7 << 20); +   else +   if (cso->max_anisotropy >= 12) +      so->tsc[0] |= (6 << 20); +   else { +      so->tsc[0] |= (cso->max_anisotropy >> 1) << 20; + +      if (cso->max_anisotropy >= 4) +         so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35; +      else +      if (cso->max_anisotropy >= 2) +         so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15; +   } + +   if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +      /* NOTE: must be deactivated for non-shadow textures */ +      so->tsc[0] |= (1 << 9); +      so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10; +   } + +   f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f); +   so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12; + +   f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f); +   f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f); +   so->tsc[2] |= +      (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff); + +   so->tsc[4] = fui(cso->border_color[0]); +   so->tsc[5] = fui(cso->border_color[1]); +   so->tsc[6] = fui(cso->border_color[2]); +   so->tsc[7] = fui(cso->border_color[3]); + +   return (void *)so; +} + +static void +nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ +   unsigned s, i; + +   for (s = 0; s < 5; ++s) +      for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i) +         if (nvc0_context(pipe)->samplers[s][i] == hwcso) +            nvc0_context(pipe)->samplers[s][i] = NULL; + +   nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nvc0_tsc_entry(hwcso)); + +   FREE(hwcso); +} + +static INLINE void +nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s, +                               unsigned nr, void **hwcso) +{ +   unsigned i; + +   for (i = 0; i < nr; ++i) { +      struct nvc0_tsc_entry *old = nvc0->samplers[s][i]; + +      nvc0->samplers[s][i] = nvc0_tsc_entry(hwcso[i]); +      if (old) +         nvc0_screen_tsc_unlock(nvc0->screen, old); +   } +   for (; i < nvc0->num_samplers[s]; ++i) +      if (nvc0->samplers[s][i]) +         nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); + +   nvc0->num_samplers[s] = nr; + +   nvc0->dirty |= NVC0_NEW_SAMPLERS; +} + +static void +nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ +   nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s); +} + +static void +nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ +   nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s); +} + +static void +nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ +   nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s); +} + +/* NOTE: only called when not referenced anywhere, won't be bound */ +static void +nvc0_sampler_view_destroy(struct pipe_context *pipe, +                          struct pipe_sampler_view *view) +{ +   pipe_resource_reference(&view->texture, NULL); + +   nvc0_screen_tic_free(nvc0_context(pipe)->screen, nvc0_tic_entry(view)); + +   FREE(nvc0_tic_entry(view)); +} + +static INLINE void +nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, +                             unsigned nr, +                             struct pipe_sampler_view **views) +{ +   unsigned i; + +   for (i = 0; i < nr; ++i) { +      struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]); +      if (old) +         nvc0_screen_tic_unlock(nvc0->screen, old); + +      pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]); +   } + +   for (i = nr; i < nvc0->num_textures[s]; ++i) { +      struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]); +      if (!old) +         continue; +      nvc0_screen_tic_unlock(nvc0->screen, old); + +      pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); +   } + +   nvc0->num_textures[s] = nr; + +   nvc0->dirty |= NVC0_NEW_TEXTURES; +} + +static void +nvc0_vp_set_sampler_views(struct pipe_context *pipe, +                          unsigned nr, +                          struct pipe_sampler_view **views) +{ +   nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views); +} + +static void +nvc0_fp_set_sampler_views(struct pipe_context *pipe, +                          unsigned nr, +                          struct pipe_sampler_view **views) +{ +   nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views); +} + +static void +nvc0_gp_set_sampler_views(struct pipe_context *pipe, +                          unsigned nr, +                          struct pipe_sampler_view **views) +{ +   nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views); +} + +/* ============================= SHADERS ======================================= + */ + +static void * +nvc0_sp_state_create(struct pipe_context *pipe, +                     const struct pipe_shader_state *cso, unsigned type) +{ +   struct nvc0_program *prog; + +   prog = CALLOC_STRUCT(nvc0_program); +   if (!prog) +      return NULL; + +   prog->type = type; +   prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + +   return (void *)prog; +} + +static void +nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +   struct nvc0_program *prog = (struct nvc0_program *)hwcso; + +   nvc0_program_destroy(nvc0_context(pipe), prog); + +   FREE((void *)prog->pipe.tokens); +   FREE(prog); +} + +static void * +nvc0_vp_state_create(struct pipe_context *pipe, +                     const struct pipe_shader_state *cso) +{ +   return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX); +} + +static void +nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->vertprog = hwcso; +    nvc0->dirty |= NVC0_NEW_VERTPROG; +} + +static void * +nvc0_fp_state_create(struct pipe_context *pipe, +                     const struct pipe_shader_state *cso) +{ +   return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT); +} + +static void +nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->fragprog = hwcso; +    nvc0->dirty |= NVC0_NEW_FRAGPROG; +} + +static void * +nvc0_gp_state_create(struct pipe_context *pipe, +                     const struct pipe_shader_state *cso) +{ +   return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY); +} + +static void +nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->gmtyprog = hwcso; +    nvc0->dirty |= NVC0_NEW_GMTYPROG; +} + +static void +nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, +                         struct pipe_resource *res) +{ +   struct nvc0_context *nvc0 = nvc0_context(pipe); + +   switch (shader) { +   case PIPE_SHADER_VERTEX: shader = 0; break; +      /* +   case PIPE_SHADER_TESSELLATION_CONTROL: shader = 1; break; +   case PIPE_SHADER_TESSELLATION_EVALUATION: shader = 2; break; +      */ +   case PIPE_SHADER_GEOMETRY: shader = 3; break; +   case PIPE_SHADER_FRAGMENT: shader = 4; break; +   default: +      assert(0); +      break; +   } + +   if (nvc0->constbuf[shader][index]) +      nvc0_bufctx_del_resident(nvc0, NVC0_BUFCTX_CONSTANT, +			       nvc0_resource( +				       nvc0->constbuf[shader][index])); + +   pipe_resource_reference(&nvc0->constbuf[shader][index], res); + +   nvc0->constbuf_dirty[shader] |= 1 << index; + +   nvc0->dirty |= NVC0_NEW_CONSTBUF; +} + +/* ============================================================================= + */ + +static void +nvc0_set_blend_color(struct pipe_context *pipe, +                     const struct pipe_blend_color *bcol) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->blend_colour = *bcol; +    nvc0->dirty |= NVC0_NEW_BLEND_COLOUR; +} + +static void +nvc0_set_stencil_ref(struct pipe_context *pipe, +                     const struct pipe_stencil_ref *sr) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->stencil_ref = *sr; +    nvc0->dirty |= NVC0_NEW_STENCIL_REF; +} + +static void +nvc0_set_clip_state(struct pipe_context *pipe, +                    const struct pipe_clip_state *clip) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); +    const unsigned size = clip->nr * sizeof(clip->ucp[0]); + +    memcpy(&nvc0->clip.ucp[0][0], &clip->ucp[0][0], size); +    nvc0->clip.nr = clip->nr; + +    nvc0->clip.depth_clamp = clip->depth_clamp; + +    nvc0->dirty |= NVC0_NEW_CLIP; +} + +static void +nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->sample_mask = sample_mask; +    nvc0->dirty |= NVC0_NEW_SAMPLE_MASK; +} + + +static void +nvc0_set_framebuffer_state(struct pipe_context *pipe, +                           const struct pipe_framebuffer_state *fb) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->framebuffer = *fb; +    nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_set_polygon_stipple(struct pipe_context *pipe, +                         const struct pipe_poly_stipple *stipple) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->stipple = *stipple; +    nvc0->dirty |= NVC0_NEW_STIPPLE; +} + +static void +nvc0_set_scissor_state(struct pipe_context *pipe, +                       const struct pipe_scissor_state *scissor) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->scissor = *scissor; +    nvc0->dirty |= NVC0_NEW_SCISSOR; +} + +static void +nvc0_set_viewport_state(struct pipe_context *pipe, +                        const struct pipe_viewport_state *vpt) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->viewport = *vpt; +    nvc0->dirty |= NVC0_NEW_VIEWPORT; +} + +static void +nvc0_set_vertex_buffers(struct pipe_context *pipe, +                        unsigned count, +                        const struct pipe_vertex_buffer *vb) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count); +    nvc0->num_vtxbufs = count; + +    nvc0->dirty |= NVC0_NEW_ARRAYS; +} + +static void +nvc0_set_index_buffer(struct pipe_context *pipe, +                      const struct pipe_index_buffer *ib) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    if (ib) +        memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf)); +    else +        nvc0->idxbuf.buffer = NULL; +} + +static void +nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) +{ +    struct nvc0_context *nvc0 = nvc0_context(pipe); + +    nvc0->vertex = hwcso; +    nvc0->dirty |= NVC0_NEW_VERTEX; +} + +void +nvc0_init_state_functions(struct nvc0_context *nvc0) +{ +    nvc0->pipe.create_blend_state = nvc0_blend_state_create; +    nvc0->pipe.bind_blend_state = nvc0_blend_state_bind; +    nvc0->pipe.delete_blend_state = nvc0_blend_state_delete; + +    nvc0->pipe.create_rasterizer_state = nvc0_rasterizer_state_create; +    nvc0->pipe.bind_rasterizer_state = nvc0_rasterizer_state_bind; +    nvc0->pipe.delete_rasterizer_state = nvc0_rasterizer_state_delete; + +    nvc0->pipe.create_depth_stencil_alpha_state = nvc0_zsa_state_create; +    nvc0->pipe.bind_depth_stencil_alpha_state = nvc0_zsa_state_bind; +    nvc0->pipe.delete_depth_stencil_alpha_state = nvc0_zsa_state_delete; + +    nvc0->pipe.create_sampler_state = nvc0_sampler_state_create; +    nvc0->pipe.delete_sampler_state = nvc0_sampler_state_delete; +    nvc0->pipe.bind_vertex_sampler_states   = nvc0_vp_sampler_states_bind; +    nvc0->pipe.bind_fragment_sampler_states = nvc0_fp_sampler_states_bind; +    nvc0->pipe.bind_geometry_sampler_states = nvc0_gp_sampler_states_bind; + +    nvc0->pipe.create_sampler_view = nvc0_create_sampler_view; +    nvc0->pipe.sampler_view_destroy = nvc0_sampler_view_destroy; +    nvc0->pipe.set_vertex_sampler_views   = nvc0_vp_set_sampler_views; +    nvc0->pipe.set_fragment_sampler_views = nvc0_fp_set_sampler_views; +    nvc0->pipe.set_geometry_sampler_views = nvc0_gp_set_sampler_views; + +    nvc0->pipe.create_vs_state = nvc0_vp_state_create; +    nvc0->pipe.create_fs_state = nvc0_fp_state_create; +    nvc0->pipe.create_gs_state = nvc0_gp_state_create; +    nvc0->pipe.bind_vs_state = nvc0_vp_state_bind; +    nvc0->pipe.bind_fs_state = nvc0_fp_state_bind; +    nvc0->pipe.bind_gs_state = nvc0_gp_state_bind; +    nvc0->pipe.delete_vs_state = nvc0_sp_state_delete; +    nvc0->pipe.delete_fs_state = nvc0_sp_state_delete; +    nvc0->pipe.delete_gs_state = nvc0_sp_state_delete; + +    nvc0->pipe.set_blend_color = nvc0_set_blend_color; +    nvc0->pipe.set_stencil_ref = nvc0_set_stencil_ref; +    nvc0->pipe.set_clip_state = nvc0_set_clip_state; +    nvc0->pipe.set_sample_mask = nvc0_set_sample_mask; +    nvc0->pipe.set_constant_buffer = nvc0_set_constant_buffer; +    nvc0->pipe.set_framebuffer_state = nvc0_set_framebuffer_state; +    nvc0->pipe.set_polygon_stipple = nvc0_set_polygon_stipple; +    nvc0->pipe.set_scissor_state = nvc0_set_scissor_state; +    nvc0->pipe.set_viewport_state = nvc0_set_viewport_state; + +    nvc0->pipe.create_vertex_elements_state = nvc0_vertex_state_create; +    nvc0->pipe.delete_vertex_elements_state = nvc0_vertex_state_delete; +    nvc0->pipe.bind_vertex_elements_state = nvc0_vertex_state_bind; + +    nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers; +    nvc0->pipe.set_index_buffer = nvc0_set_index_buffer; +} + diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c new file mode 100644 index 0000000000..a1419bb310 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -0,0 +1,423 @@ + +#include "nvc0_context.h" +#include "os/os_time.h" + +static void +nvc0_validate_zcull(struct nvc0_context *nvc0) +{ +    struct nouveau_channel *chan = nvc0->screen->base.channel; +    struct pipe_framebuffer_state *fb = &nvc0->framebuffer; +    struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture); +    struct nouveau_bo *bo = mt->base.bo; +    uint32_t size; +    uint32_t offset = align(mt->total_size, 1 << 17); +    unsigned width, height; + +    size = mt->total_size * 2; + +    height = align(fb->height, 32); +    width = fb->width % 224; +    if (width) +       width = fb->width + (224 - width); +    else +       width = fb->width; + +    BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */ +    OUT_RING  (chan, 0); +    BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */ +    OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +    OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +    offset += 1 << 17; +    BEGIN_RING(chan, RING_3D_(0x07f0), 2); /* ZCULL_ADDRESS_B_HIGH */ +    OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +    OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +    BEGIN_RING(chan, RING_3D_(0x07e0), 2); +    OUT_RING  (chan, size); +    OUT_RING  (chan, size >> 16); +    BEGIN_RING(chan, RING_3D_(0x15c8), 1); /* bits 0x3 */ +    OUT_RING  (chan, 2); +    BEGIN_RING(chan, RING_3D_(0x07c0), 4); /* ZCULL dimensions */ +    OUT_RING  (chan, width); +    OUT_RING  (chan, height); +    OUT_RING  (chan, 1); +    OUT_RING  (chan, 0); +    BEGIN_RING(chan, RING_3D_(0x15fc), 2); +    OUT_RING  (chan, 0); /* bits 0xffff */ +    OUT_RING  (chan, 0); /* bits 0xffff */ +    BEGIN_RING(chan, RING_3D_(0x1958), 1); +    OUT_RING  (chan, 0); /* bits ~0 */ +} + +static void +nvc0_validate_fb(struct nvc0_context *nvc0) +{ +    struct nouveau_channel *chan = nvc0->screen->base.channel; +    struct pipe_framebuffer_state *fb = &nvc0->framebuffer; +    unsigned i; + +    nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME); + +    BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); +    OUT_RING  (chan, (076543210 << 4) | fb->nr_cbufs); +    BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2); +    OUT_RING  (chan, fb->width << 16); +    OUT_RING  (chan, fb->height << 16); + +    for (i = 0; i < fb->nr_cbufs; ++i) { +        struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); +        struct nouveau_bo *bo = mt->base.bo; +        unsigned offset = fb->cbufs[i]->offset; +         +        BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8); +        OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +        OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +        OUT_RING  (chan, fb->cbufs[i]->width); +        OUT_RING  (chan, fb->cbufs[i]->height); +        OUT_RING  (chan, nvc0_format_table[fb->cbufs[i]->format].rt); +        OUT_RING  (chan, mt->level[fb->cbufs[i]->level].tile_mode); +        OUT_RING  (chan, 1); +        OUT_RING  (chan, 0); + +        nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, +                                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +    } + +    if (fb->zsbuf) { +        struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture); +        struct nouveau_bo *bo = mt->base.bo; +        unsigned offset = fb->zsbuf->offset; +         +        BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); +        OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +        OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +        OUT_RING  (chan, nvc0_format_table[fb->zsbuf->format].rt); +        OUT_RING  (chan, mt->level[fb->zsbuf->level].tile_mode); +        OUT_RING  (chan, 0); +        BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); +        OUT_RING  (chan, 1); +        BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); +        OUT_RING  (chan, fb->zsbuf->width); +        OUT_RING  (chan, fb->zsbuf->height); +        OUT_RING  (chan, (1 << 16) | 1); + +        nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, +                                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +    } else { +        BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); +        OUT_RING  (chan, 0); +    } + +#ifndef NVC0_SCISSORS_CLIPPING +    BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); +    OUT_RING  (chan, fb->width << 16); +    OUT_RING  (chan, fb->height << 16); +#endif +} + +static void +nvc0_validate_blend_colour(struct nvc0_context *nvc0) +{ +    struct nouveau_channel *chan = nvc0->screen->base.channel; + +    BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4); +    OUT_RINGf (chan, nvc0->blend_colour.color[0]); +    OUT_RINGf (chan, nvc0->blend_colour.color[1]); +    OUT_RINGf (chan, nvc0->blend_colour.color[2]); +    OUT_RINGf (chan, nvc0->blend_colour.color[3]);     +} + +static void +nvc0_validate_stencil_ref(struct nvc0_context *nvc0) +{ +    struct nouveau_channel *chan = nvc0->screen->base.channel; + +    BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1); +    OUT_RING  (chan, nvc0->stencil_ref.ref_value[0]); +    BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1); +    OUT_RING  (chan, nvc0->stencil_ref.ref_value[1]); +} + +static void +nvc0_validate_stipple(struct nvc0_context *nvc0) +{ +    struct nouveau_channel *chan = nvc0->screen->base.channel; +    unsigned i; + +    BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32); +    for (i = 0; i < 32; ++i) +        OUT_RING(chan, util_bswap32(nvc0->stipple.stipple[i])); +} + +static void +nvc0_validate_scissor(struct nvc0_context *nvc0) +{ +    struct nouveau_channel *chan = nvc0->screen->base.channel; +    struct pipe_scissor_state *s = &nvc0->scissor; +#ifdef NVC0_SCISSORS_CLIPPING +    struct pipe_viewport_state *vp = &nvc0->viewport; +    int minx, maxx, miny, maxy; + +    if (!(nvc0->dirty & +          (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) && +        nvc0->state.scissor == nvc0->rast->pipe.scissor) +       return; +    nvc0->state.scissor = nvc0->rast->pipe.scissor; + +    if (nvc0->state.scissor) { +       minx = s->minx; +       maxx = s->maxx; +       miny = s->miny; +       maxy = s->maxy; +    } else { +       minx = 0; +       maxx = nvc0->framebuffer.width; +       miny = 0; +       maxy = nvc0->framebuffer.height; +    } + +    minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); +    maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); +    miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); +    maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); + +    BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); +    OUT_RING  (chan, (maxx << 16) | minx); +    OUT_RING  (chan, (maxy << 16) | miny); +    BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); +    OUT_RING  (chan, ((maxx - minx) << 16) | minx); +    OUT_RING  (chan, ((maxy - miny) << 16) | miny); +#else +    BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); +    OUT_RING  (chan, (s->maxx << 16) | s->minx); +    OUT_RING  (chan, (s->maxy << 16) | s->miny); +#endif +} + +static void +nvc0_validate_viewport(struct nvc0_context *nvc0) +{ +    struct nouveau_channel *chan = nvc0->screen->base.channel; + +    BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3); +    OUT_RINGf (chan, nvc0->viewport.translate[0]); +    OUT_RINGf (chan, nvc0->viewport.translate[1]); +    OUT_RINGf (chan, nvc0->viewport.translate[2]); +    BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3); +    OUT_RINGf (chan, nvc0->viewport.scale[0]); +    OUT_RINGf (chan, nvc0->viewport.scale[1]); +    OUT_RINGf (chan, nvc0->viewport.scale[2]); + +#ifdef NVC0_SCISSORS_CLIPPING +    BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); +    OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]); +    OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]); +#endif +} + +static void +nvc0_validate_clip(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   uint32_t clip; + +   clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002; +#ifndef NVC0_SCISSORS_CLIPPING +   clip |= 0x1080; +#endif + +   BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); +   OUT_RING  (chan, clip); + +   if (nvc0->clip.nr) { +      struct nouveau_bo *bo = nvc0->screen->uniforms; + +      BEGIN_RING(chan, RING_3D(CB_SIZE), 3); +      OUT_RING  (chan, 256); +      OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +      OUT_RELOCl(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +      BEGIN_RING_1I(chan, RING_3D(CB_POS), nvc0->clip.nr * 4 + 1); +      OUT_RING  (chan, 0); +      OUT_RINGp (chan, &nvc0->clip.ucp[0][0], nvc0->clip.nr * 4); + +      BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1); +      OUT_RING  (chan, (1 << nvc0->clip.nr) - 1); +   } else { +      INLIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 0); +   } +} + +static void +nvc0_validate_blend(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; + +   WAIT_RING(chan, nvc0->blend->size); +   OUT_RINGp(chan, nvc0->blend->state, nvc0->blend->size); +} + +static void +nvc0_validate_zsa(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; + +   WAIT_RING(chan, nvc0->zsa->size); +   OUT_RINGp(chan, nvc0->zsa->state, nvc0->zsa->size); +} + +static void +nvc0_validate_rasterizer(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; + +   WAIT_RING(chan, nvc0->rast->size); +   OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size); +} + +static void +nvc0_constbufs_validate(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct nouveau_bo *bo; +   unsigned s; + +   for (s = 0; s < 5; ++s) { +      struct nvc0_resource *res; +      int i; + +      while (nvc0->constbuf_dirty[s]) { +         unsigned base = 0; +         unsigned offset = 0, words = 0; +         boolean rebind = TRUE; + +         i = ffs(nvc0->constbuf_dirty[s]) - 1; +         nvc0->constbuf_dirty[s] &= ~(1 << i); + +         res = nvc0_resource(nvc0->constbuf[s][i]); +         if (!res) { +            BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1); +            OUT_RING  (chan, (i << 4) | 0); +            if (i == 0) +               nvc0->state.uniform_buffer_bound[s] = 0; +            continue; +         } + +         if (!nvc0_resource_mapped_by_gpu(&res->base)) { +            if (i == 0) { +               base = s << 16; +               bo = nvc0->screen->uniforms; + +               if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0) +                  rebind = FALSE; +               else +                  nvc0->state.uniform_buffer_bound[s] = +                     align(res->base.width0, 0x100); +            } else { +               bo = res->bo; +            } +#if 0 +            nvc0_m2mf_push_linear(nvc0, bo, NOUVEAU_BO_VRAM, +                                  base, res->base.width0, res->data); +            BEGIN_RING(chan, RING_3D_(0x021c), 1); +            OUT_RING  (chan, 0x1111); +#else +            words = res->base.width0 / 4; +#endif +         } else { +            bo = res->bo; +            if (i == 0) +               nvc0->state.uniform_buffer_bound[s] = 0; +         } + +         if (bo != nvc0->screen->uniforms) +            nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_CONSTANT, res, +                                     NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + +         if (rebind) { +            BEGIN_RING(chan, RING_3D(CB_SIZE), 3); +            OUT_RING  (chan, align(res->base.width0, 0x100)); +            OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +            OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +            BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1); +            OUT_RING  (chan, (i << 4) | 1); +         } + +         while (words) { +            unsigned nr = AVAIL_RING(chan); + +            if (nr < 16) { +               FIRE_RING(chan); +               continue; +            } +            nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1); + +            BEGIN_RING(chan, RING_3D(CB_SIZE), 3); +            OUT_RING  (chan, align(res->base.width0, 0x100)); +            OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +            OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +            BEGIN_RING_1I(chan, RING_3D(CB_POS), nr + 1); +            OUT_RING  (chan, offset); +            OUT_RINGp (chan, &res->data[offset], nr); + +            offset += nr * 4; +            words -= nr; +         } +      } +   } +} + +static struct state_validate { +    void (*func)(struct nvc0_context *); +    uint32_t states; +} validate_list[] = { +    { nvc0_validate_fb,            NVC0_NEW_FRAMEBUFFER }, +    { nvc0_validate_blend,         NVC0_NEW_BLEND }, +    { nvc0_validate_zsa,           NVC0_NEW_ZSA }, +    { nvc0_validate_rasterizer,    NVC0_NEW_RASTERIZER }, +    { nvc0_validate_blend_colour,  NVC0_NEW_BLEND_COLOUR }, +    { nvc0_validate_stencil_ref,   NVC0_NEW_STENCIL_REF }, +    { nvc0_validate_stipple,       NVC0_NEW_STIPPLE }, +#ifdef NVC0_SCISSORS_CLIPPING +    { nvc0_validate_scissor,       NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | +                                   NVC0_NEW_RASTERIZER | +                                   NVC0_NEW_FRAMEBUFFER }, +#else +    { nvc0_validate_scissor,       NVC0_NEW_SCISSOR }, +#endif +    { nvc0_validate_viewport,      NVC0_NEW_VIEWPORT }, +    { nvc0_validate_clip,          NVC0_NEW_CLIP }, +    { nvc0_vertprog_validate,      NVC0_NEW_VERTPROG }, +    { nvc0_tctlprog_validate,      NVC0_NEW_TCTLPROG }, +    { nvc0_tevlprog_validate,      NVC0_NEW_TEVLPROG }, +    { nvc0_gmtyprog_validate,      NVC0_NEW_GMTYPROG }, +    { nvc0_fragprog_validate,      NVC0_NEW_FRAGPROG }, +    { nvc0_constbufs_validate,     NVC0_NEW_CONSTBUF }, +    { nvc0_validate_textures,      NVC0_NEW_TEXTURES }, +    { nvc0_validate_samplers,      NVC0_NEW_SAMPLERS }, +    { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS } +}; +#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) + +boolean +nvc0_state_validate(struct nvc0_context *nvc0) +{ +   unsigned i; +#if 0 +   if (nvc0->screen->cur_ctx != nvc0) /* FIXME: not everything is valid */ +      nvc0->dirty = 0xffffffff; +#endif +   nvc0->screen->cur_ctx = nvc0; + +   if (nvc0->dirty) { +      for (i = 0; i < validate_list_len; ++i) { +         struct state_validate *validate = &validate_list[i]; + +         if (nvc0->dirty & validate->states) +            validate->func(nvc0); +      } +      nvc0->dirty = 0; +   } + +   nvc0_bufctx_emit_relocs(nvc0); + +   return TRUE; +} diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h new file mode 100644 index 0000000000..67674d4093 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -0,0 +1,81 @@ + +#ifndef __NVC0_STATEOBJ_H__ +#define __NVC0_STATEOBJ_H__ + +#include "pipe/p_state.h" + +#define NVC0_SCISSORS_CLIPPING + +#define SB_BEGIN_3D(so, m, s)                                                  \ +   (so)->state[(so)->size++] =                                                 \ +      (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) + +#define SB_OUT_3D(so, m, d)                                                    \ +   (so)->state[(so)->size++] =                                                 \ +      (0x8 << 28) | ((d) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) + +#define SB_DATA(so, u) (so)->state[(so)->size++] = (u) + +struct nvc0_blend_stateobj { +   struct pipe_blend_state pipe; +   int size; +   uint32_t state[72]; +}; + +struct nvc0_tsc_entry { +   int id; +   uint32_t tsc[8]; +}; + +static INLINE struct nvc0_tsc_entry * +nvc0_tsc_entry(void *hwcso) +{ +   return (struct nvc0_tsc_entry *)hwcso; +} + +struct nvc0_tic_entry { +   struct pipe_sampler_view pipe; +   int id; +   uint32_t tic[8]; +}; + +static INLINE struct nvc0_tic_entry * +nvc0_tic_entry(struct pipe_sampler_view *view) +{ +   return (struct nvc0_tic_entry *)view; +} + +struct nvc0_rasterizer_stateobj { +   struct pipe_rasterizer_state pipe; +   int size; +   uint32_t state[43]; +}; + +struct nvc0_zsa_stateobj { +   struct pipe_depth_stencil_alpha_state pipe; +   int size; +   uint32_t state[29]; +}; + +struct nvc0_vertex_element { +   struct pipe_vertex_element pipe; +   uint32_t state; +}; + +struct nvc0_vertex_stateobj { +   struct translate *translate; +   unsigned num_elements; +   uint32_t instance_bits; +   unsigned vtx_size; +   unsigned vtx_per_packet_max; +   struct nvc0_vertex_element element[1]; +}; + +/* will have to lookup index -> location qualifier from nvc0_program */ +struct nvc0_tfb_state { +   uint8_t varying_count[4]; +   uint32_t stride[4]; +   uint8_t varying_indices[1]; +}; + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c new file mode 100644 index 0000000000..b52b09877c --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -0,0 +1,361 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdint.h> + +#include "pipe/p_defines.h" + +#include "util/u_inlines.h" +#include "util/u_pack_color.h" +#include "util/u_format.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nv50_defs.xml.h" + +/* return TRUE for formats that can be converted among each other by NVC0_2D */ +static INLINE boolean +nvc0_2d_format_faithful(enum pipe_format format) +{ +   switch (format) { +   case PIPE_FORMAT_B8G8R8A8_UNORM: +   case PIPE_FORMAT_B8G8R8X8_UNORM: +   case PIPE_FORMAT_B8G8R8A8_SRGB: +   case PIPE_FORMAT_B8G8R8X8_SRGB: +   case PIPE_FORMAT_B5G6R5_UNORM: +   case PIPE_FORMAT_B5G5R5A1_UNORM: +   case PIPE_FORMAT_B10G10R10A2_UNORM: +   case PIPE_FORMAT_R8_UNORM: +   case PIPE_FORMAT_R32G32B32A32_FLOAT: +   case PIPE_FORMAT_R32G32B32_FLOAT: +      return TRUE; +   default: +      return FALSE; +   } +} + +static INLINE uint8_t +nvc0_2d_format(enum pipe_format format) +{ +   uint8_t id = nvc0_format_table[format].rt; + +   /* Hardware values for color formats range from 0xc0 to 0xff, +    * but the 2D engine doesn't support all of them. +    */ +   if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) +      return id; + +   switch (util_format_get_blocksize(format)) { +   case 1: +      return NV50_SURFACE_FORMAT_R8_UNORM; +   case 2: +      return NV50_SURFACE_FORMAT_R16_UNORM; +   case 4: +      return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; +   default: +      return 0; +   } +} + +static int +nvc0_surface_set(struct nvc0_screen *screen, struct pipe_surface *ps, int dst) +{ +   struct nvc0_miptree *mt = nvc0_miptree(ps->texture); +   struct nouveau_channel *chan = screen->base.channel; +   struct nouveau_bo *bo = nvc0_miptree(ps->texture)->base.bo; +   int format, mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; +   int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); + +   format = nvc0_2d_format(ps->format); +   if (!format) { +      NOUVEAU_ERR("invalid/unsupported surface format: %s\n", +                  util_format_name(ps->format)); +      return 1; +   } + +   if (!bo->tile_flags) { +      BEGIN_RING(chan, RING_2D_(mthd), 2); +      OUT_RING  (chan, format); +      OUT_RING  (chan, 1); +      BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5); +      OUT_RING  (chan, mt->level[ps->level].pitch); +      OUT_RING  (chan, ps->width); +      OUT_RING  (chan, ps->height); +      OUT_RELOCh(chan, bo, ps->offset, flags); +      OUT_RELOCl(chan, bo, ps->offset, flags); +   } else { +      BEGIN_RING(chan, RING_2D_(mthd), 5); +      OUT_RING  (chan, format); +      OUT_RING  (chan, 0); +      OUT_RING  (chan, mt->level[ps->level].tile_mode); +      OUT_RING  (chan, 1); +      OUT_RING  (chan, 0); +      BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4); +      OUT_RING  (chan, ps->width); +      OUT_RING  (chan, ps->height); +      OUT_RELOCh(chan, bo, ps->offset, flags); +      OUT_RELOCl(chan, bo, ps->offset, flags); +   } +  +#if 0 +   if (dst) { +      BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4); +      OUT_RING  (chan, 0); +      OUT_RING  (chan, 0); +      OUT_RING  (chan, surf->width); +      OUT_RING  (chan, surf->height); +   } +#endif +   return 0; +} + +static int +nvc0_surface_do_copy(struct nvc0_screen *screen, +                     struct pipe_surface *dst, int dx, int dy, +                     struct pipe_surface *src, int sx, int sy, +                     int w, int h) +{ +   struct nouveau_channel *chan = screen->base.channel; +   int ret; + +   ret = MARK_RING(chan, 2*16 + 32, 4); +   if (ret) +      return ret; + +   ret = nvc0_surface_set(screen, dst, 1); +   if (ret) +      return ret; + +   ret = nvc0_surface_set(screen, src, 0); +   if (ret) +      return ret; + +   /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */ +   BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1); +   OUT_RING  (chan, 0); +   BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4); +   OUT_RING  (chan, dx); +   OUT_RING  (chan, dy); +   OUT_RING  (chan, w); +   OUT_RING  (chan, h); +   BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4); +   OUT_RING  (chan, 0); +   OUT_RING  (chan, 1); +   OUT_RING  (chan, 0); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4); +   OUT_RING  (chan, 0); +   OUT_RING  (chan, sx); +   OUT_RING  (chan, 0); +   OUT_RING  (chan, sy); + +   return 0; +} + +static void +nvc0_surface_copy(struct pipe_context *pipe, +		  struct pipe_resource *dest, struct pipe_subresource subdst, +		  unsigned destx, unsigned desty, unsigned destz, +		  struct pipe_resource *src, struct pipe_subresource subsrc, +		  unsigned srcx, unsigned srcy, unsigned srcz, +		  unsigned width, unsigned height) +{ +   struct nvc0_context *nv50 = nvc0_context(pipe); +   struct nvc0_screen *screen = nv50->screen; +   struct pipe_surface *ps_dst, *ps_src; + +   assert((src->format == dest->format) || +          (nvc0_2d_format_faithful(src->format) && +           nvc0_2d_format_faithful(dest->format))); + +   ps_src = nvc0_miptree_surface_new(pipe->screen, src, subsrc.face, +                                     subsrc.level, srcz, 0 /* bind flags */); +   ps_dst = nvc0_miptree_surface_new(pipe->screen, dest, subdst.face, +                                     subdst.level, destz, 0 /* bind flags */); + +   nvc0_surface_do_copy(screen, ps_dst, destx, desty, ps_src, srcx, +                        srcy, width, height); + +   nvc0_miptree_surface_del(ps_src); +   nvc0_miptree_surface_del(ps_dst); +} + +static void +nvc0_clear_render_target(struct pipe_context *pipe, +			 struct pipe_surface *dst, +			 const float *rgba, +			 unsigned dstx, unsigned dsty, +			 unsigned width, unsigned height) +{ +	struct nvc0_context *nv50 = nvc0_context(pipe); +	struct nvc0_screen *screen = nv50->screen; +	struct nouveau_channel *chan = screen->base.channel; +	struct nvc0_miptree *mt = nvc0_miptree(dst->texture); +	struct nouveau_bo *bo = mt->base.bo; + +	BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); +	OUT_RINGf (chan, rgba[0]); +	OUT_RINGf (chan, rgba[1]); +	OUT_RINGf (chan, rgba[2]); +	OUT_RINGf (chan, rgba[3]); + +	if (MARK_RING(chan, 18, 2)) +		return; + +	BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); +	OUT_RING  (chan, 1); +	BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 8); +	OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RING  (chan, dst->width); +	OUT_RING  (chan, dst->height); +	OUT_RING  (chan, nvc0_format_table[dst->format].rt); +	OUT_RING  (chan, mt->level[dst->level].tile_mode); +	OUT_RING  (chan, 1); +	OUT_RING  (chan, 0); + +	/* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ + +	BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); +	OUT_RING  (chan, (width << 16) | dstx); +	OUT_RING  (chan, (height << 16) | dsty); + +	BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); +	OUT_RING  (chan, 0x3c); + +	nv50->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_clear_depth_stencil(struct pipe_context *pipe, +                         struct pipe_surface *dst, +                         unsigned clear_flags, +                         double depth, +                         unsigned stencil, +                         unsigned dstx, unsigned dsty, +                         unsigned width, unsigned height) +{ +	struct nvc0_context *nv50 = nvc0_context(pipe); +	struct nvc0_screen *screen = nv50->screen; +	struct nouveau_channel *chan = screen->base.channel; +	struct nvc0_miptree *mt = nvc0_miptree(dst->texture); +	struct nouveau_bo *bo = mt->base.bo; +	uint32_t mode = 0; + +	if (clear_flags & PIPE_CLEAR_DEPTH) { +		BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); +		OUT_RINGf (chan, depth); +		mode |= NVC0_3D_CLEAR_BUFFERS_Z; +	} + +	if (clear_flags & PIPE_CLEAR_STENCIL) { +		BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); +		OUT_RING  (chan, stencil & 0xff); +		mode |= NVC0_3D_CLEAR_BUFFERS_S; +	} + +	if (MARK_RING(chan, 17, 2)) +		return; + +	BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); +	OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RING  (chan, nvc0_format_table[dst->format].rt); +	OUT_RING  (chan, mt->level[dst->level].tile_mode); +	OUT_RING  (chan, 0); +	BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); +	OUT_RING  (chan, 1); +	BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); +	OUT_RING  (chan, dst->width); +	OUT_RING  (chan, dst->height); +	OUT_RING  (chan, (1 << 16) | 1); + +	BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); +	OUT_RING  (chan, (width << 16) | dstx); +	OUT_RING  (chan, (height << 16) | dsty); + +	BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); +	OUT_RING  (chan, mode); + +	nv50->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +void +nvc0_clear(struct pipe_context *pipe, unsigned buffers, +           const float *rgba, double depth, unsigned stencil) +{ +   struct nvc0_context *nvc0 = nvc0_context(pipe); +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct pipe_framebuffer_state *fb = &nvc0->framebuffer; +   unsigned i; +   const unsigned dirty = nvc0->dirty; +   uint32_t mode = 0; + +   /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ +   nvc0->dirty &= NVC0_NEW_FRAMEBUFFER; +   if (!nvc0_state_validate(nvc0)) +      return; + +   if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { +      BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); +      OUT_RINGf (chan, rgba[0]); +      OUT_RINGf (chan, rgba[1]); +      OUT_RINGf (chan, rgba[2]); +      OUT_RINGf (chan, rgba[3]); +      mode = +         NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G | +         NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A; +   } + +   if (buffers & PIPE_CLEAR_DEPTH) { +      BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); +      OUT_RING  (chan, fui(depth)); +      mode |= NVC0_3D_CLEAR_BUFFERS_Z; +   } + +   if (buffers & PIPE_CLEAR_STENCIL) { +      BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); +      OUT_RING  (chan, stencil & 0xff); +      mode |= NVC0_3D_CLEAR_BUFFERS_S; +   } + +   BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); +   OUT_RING  (chan, mode); + +   for (i = 1; i < fb->nr_cbufs; i++) { +      BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); +      OUT_RING  (chan, (i << 6) | 0x3c); +   } + +   nvc0->dirty = dirty & ~NVC0_NEW_FRAMEBUFFER; +} + +void +nvc0_init_surface_functions(struct nvc0_context *nvc0) +{ +	nvc0->pipe.resource_copy_region = nvc0_surface_copy; +	nvc0->pipe.clear_render_target = nvc0_clear_render_target; +	nvc0->pipe.clear_depth_stencil = nvc0_clear_depth_stencil; +} + + diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c new file mode 100644 index 0000000000..5f28b83282 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -0,0 +1,266 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_context.h" +#include "nvc0_resource.h" +#include "nv50_texture.xml.h" + +#include "util/u_format.h" + +static INLINE uint32_t +nv50_tic_swizzle(uint32_t tc, unsigned swz) +{ +   switch (swz) { +   case PIPE_SWIZZLE_RED: +      return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; +   case PIPE_SWIZZLE_GREEN: +      return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; +   case PIPE_SWIZZLE_BLUE: +      return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; +   case PIPE_SWIZZLE_ALPHA: +      return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; +   case PIPE_SWIZZLE_ONE: +      return NV50_TIC_MAP_ONE; +   case PIPE_SWIZZLE_ZERO: +   default: +      return NV50_TIC_MAP_ZERO; +   } +} + +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *pipe, +                         struct pipe_resource *texture, +                         const struct pipe_sampler_view *templ) +{ +   const struct util_format_description *desc; +   uint32_t *tic; +   uint32_t swz[4]; +   struct nvc0_tic_entry *view; +   struct nvc0_miptree *mt = nvc0_miptree(texture); + +   view = MALLOC_STRUCT(nvc0_tic_entry); +   if (!view) +      return NULL; + +   view->pipe = *templ; +   view->pipe.reference.count = 1; +   view->pipe.texture = NULL; +   view->pipe.context = pipe; + +   view->id = -1; + +   pipe_resource_reference(&view->pipe.texture, texture); + +   tic = &view->tic[0]; + +   desc = util_format_description(mt->base.base.format); + +   /* TIC[0] */ + +   tic[0] = nvc0_format_table[view->pipe.format].tic; + +   swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r); +   swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g); +   swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b); +   swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a); +   tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | +      (swz[0] << NV50_TIC_0_MAPR__SHIFT) | +      (swz[1] << NV50_TIC_0_MAPG__SHIFT) | +      (swz[2] << NV50_TIC_0_MAPB__SHIFT) | +      (swz[3] << NV50_TIC_0_MAPA__SHIFT); + +   /* tic[1] = mt->base.bo->offset; */ +   tic[2] = /* mt->base.bo->offset >> 32 */ 0; + +   tic[2] |= 0x10001000 | /* NV50_TIC_2_NO_BORDER */ 0x40000000; + +   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) +      tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + +   if (mt->base.base.target != PIPE_TEXTURE_RECT) +      tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + +   tic[2] |= +      ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) | +      ((mt->base.bo->tile_mode & 0xf00) << (21 - 4)); + +   switch (mt->base.base.target) { +   case PIPE_TEXTURE_1D: +      tic[2] |= NV50_TIC_2_TARGET_1D; +      break; +   case PIPE_TEXTURE_2D: +      tic[2] |= NV50_TIC_2_TARGET_2D; +      break; +   case PIPE_TEXTURE_RECT: +      tic[2] |= NV50_TIC_2_TARGET_RECT; +      break; +   case PIPE_TEXTURE_3D: +      tic[2] |= NV50_TIC_2_TARGET_3D; +      break; +   case PIPE_TEXTURE_CUBE: +      tic[2] |= NV50_TIC_2_TARGET_CUBE; +      break; +   case PIPE_BUFFER: +      tic[2] |= NV50_TIC_2_TARGET_BUFFER | /* NV50_TIC_2_LINEAR */ (1 << 18); +   default: +      NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target); +      return FALSE; +   } + +   if (mt->base.base.target == PIPE_BUFFER) +      tic[3] = mt->base.base.width0; +   else +      tic[3] = 0x00300000; + +   tic[4] = (1 << 31) | mt->base.base.width0; + +   tic[5] = mt->base.base.height0 & 0xffff; +   tic[5] |= mt->base.base.depth0 << 16; +   tic[5] |= mt->base.base.last_level << 28; + +   tic[6] = 0x03000000; + +   tic[7] = (view->pipe.last_level << 4) | view->pipe.first_level; + +   return &view->pipe; +} + +static boolean +nvc0_validate_tic(struct nvc0_context *nvc0, int s) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct nouveau_bo *txc = nvc0->screen->txc; +   unsigned i; +   boolean need_flush = FALSE; + +   for (i = 0; i < nvc0->num_textures[s]; ++i) { +      struct nvc0_tic_entry *tic = nvc0_tic_entry(nvc0->textures[s][i]); +      struct nvc0_resource *res; + +      if (!tic) { +         BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); +         OUT_RING  (chan, (i << 1) | 0); +         continue; +      } +      res = &nvc0_miptree(tic->pipe.texture)->base; + +      if (tic->id < 0) { +         tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + +         MARK_RING (chan, 9 + 8, 4); +         BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); +         OUT_RELOCh(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +         OUT_RELOCl(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +         BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); +         OUT_RING  (chan, 32); +         OUT_RING  (chan, 1); +         BEGIN_RING(chan, RING_MF(EXEC), 1); +         OUT_RING  (chan, 0x100111); +         BEGIN_RING(chan, RING_MF(DATA), 8); +         OUT_RING  (chan, tic->tic[0]); +         OUT_RELOCl(chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +         OUT_RELOC (chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | +                    NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]); +         OUT_RINGp (chan, &tic->tic[3], 5); + +         need_flush = TRUE; +      } +      nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + +      nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res, +                               NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + +      BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); +      OUT_RING  (chan, (tic->id << 9) | (i << 1) | 1); +   } +   for (; i < nvc0->state.num_textures[s]; ++i) { +      BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); +      OUT_RING  (chan, (i << 1) | 0); +   } +   nvc0->state.num_textures[s] = nvc0->num_textures[s]; + +   return need_flush; +} + +void nvc0_validate_textures(struct nvc0_context *nvc0) +{ +   boolean need_flush; + +   nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES); + +   need_flush  = nvc0_validate_tic(nvc0, 0); +   need_flush |= nvc0_validate_tic(nvc0, 4); + +   if (need_flush) { +      BEGIN_RING(nvc0->screen->base.channel, RING_3D(TIC_FLUSH), 1); +      OUT_RING  (nvc0->screen->base.channel, 0); +   } +} + +static boolean +nvc0_validate_tsc(struct nvc0_context *nvc0, int s) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   unsigned i; +   boolean need_flush = FALSE; + +   for (i = 0; i < nvc0->num_samplers[s]; ++i) { +      struct nvc0_tsc_entry *tsc = nvc0_tsc_entry(nvc0->samplers[s][i]); + +      if (!tsc) { +         BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); +         OUT_RING  (chan, (i << 4) | 0); +         continue; +      } +      if (tsc->id < 0) { +         tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); + +         nvc0_m2mf_push_linear(nvc0, nvc0->screen->txc, NOUVEAU_BO_VRAM, +                               65536 + tsc->id * 32, 32, tsc->tsc); +         need_flush = TRUE; +      } +      nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + +      BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); +      OUT_RING  (chan, (tsc->id << 12) | (i << 4) | 1); +   } +   for (; i < nvc0->state.num_samplers[s]; ++i) { +      BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); +      OUT_RING  (chan, (i << 4) | 0); +   } +   nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; + +   return need_flush; +} + +void nvc0_validate_samplers(struct nvc0_context *nvc0) +{ +   boolean need_flush; + +   need_flush  = nvc0_validate_tsc(nvc0, 0); +   need_flush |= nvc0_validate_tsc(nvc0, 4); + +   if (need_flush) { +      BEGIN_RING(nvc0->screen->base.channel, RING_3D(TSC_FLUSH), 1); +      OUT_RING  (nvc0->screen->base.channel, 0); +   } +} diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c new file mode 100644 index 0000000000..d0c8275489 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -0,0 +1,1938 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <unistd.h> + +#define NOUVEAU_DEBUG 1 + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" +#include "util/u_dynarray.h" + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +/* Arbitrary internal limits. */ +#define BLD_MAX_TEMPS 64 +#define BLD_MAX_ADDRS 4 +#define BLD_MAX_PREDS 4 +#define BLD_MAX_IMMDS 128 +#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS + +#define BLD_MAX_COND_NESTING 8 +#define BLD_MAX_LOOP_NESTING 4 +#define BLD_MAX_CALL_NESTING 2 + +/* This structure represents a TGSI register. */ +struct bld_register { +   struct nv_value *current; +   /* collect all SSA values assigned to it */ +   struct util_dynarray vals; +   /* 1 bit per loop level, indicates if used/defd, reset when loop ends */ +   uint16_t loop_use; +   uint16_t loop_def; +}; + +static INLINE struct nv_value ** +bld_register_access(struct bld_register *reg, unsigned i) +{ +   return util_dynarray_element(®->vals, struct nv_value *, i); +} + +static INLINE void +bld_register_add_val(struct bld_register *reg, struct nv_value *val) +{ +   util_dynarray_append(®->vals, struct nv_value *, val); +} + +static INLINE boolean +bld_register_del_val(struct bld_register *reg, struct nv_value *val) +{ +   unsigned i; + +   for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i) +      if (*bld_register_access(reg, i - 1) == val) +         break; +   if (!i) +      return FALSE; + +   if (i != reg->vals.size / sizeof(struct nv_value *)) +      *bld_register_access(reg, i - 1) = util_dynarray_pop(®->vals, +                                                           struct nv_value *); +   else +      reg->vals.size -= sizeof(struct nv_value *); + +   return TRUE; +} + +struct bld_context { +   struct nvc0_translation_info *ti; + +   struct nv_pc *pc; +   struct nv_basic_block *b; + +   struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING]; +   int call_lvl; + +   struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING]; +   struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING]; +   struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING]; +   int cond_lvl; +   struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING]; +   struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING]; +   int loop_lvl; + +   ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */ + +   struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */ +   struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */ +   struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */ +   struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */ + +   uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8]; +   int hpos_index; + +   struct nv_value *zero; +   struct nv_value *frag_coord[4]; + +   /* wipe on new BB */ +   struct nv_value *saved_sysvals[4]; +   struct nv_value *saved_addr[4][2]; +   struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4]; +   struct nv_value *saved_immd[BLD_MAX_IMMDS]; +   uint num_immds; +}; + +static INLINE ubyte +bld_register_file(struct bld_context *bld, struct bld_register *reg) +{ +   if (reg < &bld->avs[0][0]) return NV_FILE_GPR; +   else +   if (reg < &bld->pvs[0][0]) return NV_FILE_GPR; +   else +   if (reg < &bld->ovs[0][0]) return NV_FILE_PRED; +   else +      return NV_FILE_MEM_V; +} + +static INLINE struct nv_value * +bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c) +{ +   regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl; +   return regs[i * 4 + c].current; +} + +static struct nv_value * +bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *); + +/* If a variable is defined in a loop without prior use, we don't need + * a phi in the loop header to account for backwards flow. + * + * However, if this variable is then also used outside the loop, we do + * need a phi after all. But we must not use this phi's def inside the + * loop, so we can eliminate the phi if it is unused later. + */ +static INLINE void +bld_store(struct bld_context *bld, +          struct bld_register *regs, int i, int c, struct nv_value *val) +{ +   const uint16_t m = 1 << bld->loop_lvl; +   struct bld_register *reg = ®s[i * 4 + c]; + +   if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use))) +      bld_loop_phi(bld, reg, val); + +   reg->current = val; +   bld_register_add_val(reg, reg->current); + +   reg->loop_def |= 1 << bld->loop_lvl; +} + +#define FETCH_TEMP(i, c)    bld_fetch(bld, &bld->tvs[0][0], i, c) +#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v)) +#define FETCH_ADDR(i, c)    bld_fetch(bld, &bld->avs[0][0], i, c) +#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v)) +#define FETCH_PRED(i, c)    bld_fetch(bld, &bld->pvs[0][0], i, c) +#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v)) +#define STORE_OUTP(i, c, v)                                         \ +   do {                                                             \ +      bld_store(bld, &bld->ovs[0][0], i, c, (v));                   \ +      bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \ +   } while (0) + +static INLINE void +bld_clear_def_use(struct bld_register *regs, int n, int lvl) +{ +   int i; +   const uint16_t mask = ~(1 << lvl); + +   for (i = 0; i < n * 4; ++i) { +      regs[i].loop_def &= mask; +      regs[i].loop_use &= mask; +   } +} + +static INLINE void +bld_warn_uninitialized(struct bld_context *bld, int kind, +                       struct bld_register *reg, struct nv_basic_block *b) +{ +#ifdef NOUVEAU_DEBUG +   long i = (reg - &bld->tvs[0][0]) / 4; +   long c = (reg - &bld->tvs[0][0]) & 3; + +   if (c == 3) +      c = -1; +   debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n", +                i, (int)('x' + c), kind ? "may be" : "is", b->id); +#endif +} + +static INLINE struct nv_value * +bld_def(struct nv_instruction *i, int c, struct nv_value *value) +{ +   i->def[c] = value; +   value->insn = i; +   return value; +} + +static INLINE struct nv_value * +find_by_bb(struct bld_register *reg, struct nv_basic_block *b) +{ +   int i; + +   if (reg->current && reg->current->insn->bb == b) +      return reg->current; + +   for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i) +      if ((*bld_register_access(reg, i))->insn->bb == b) +         return *bld_register_access(reg, i); +   return NULL; +} + +/* Fetch value from register that was defined in the specified BB, + * or search for first definitions in all of its predecessors. + */ +static void +fetch_by_bb(struct bld_register *reg, +            struct nv_value **vals, int *n, +            struct nv_basic_block *b) +{ +   int i; +   struct nv_value *val; + +   assert(*n < 16); /* MAX_COND_NESTING */ + +   val = find_by_bb(reg, b); +   if (val) { +      for (i = 0; i < *n; ++i) +         if (vals[i] == val) +            return; +      vals[(*n)++] = val; +      return; +   } +   for (i = 0; i < b->num_in; ++i) +      if (!IS_WALL_EDGE(b->in_kind[i])) +         fetch_by_bb(reg, vals, n, b->in[i]); +} + +static INLINE struct nv_value * +bld_load_imm_u32(struct bld_context *bld, uint32_t u); + +static INLINE struct nv_value * +bld_undef(struct bld_context *bld, ubyte file) +{ +   struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF); + +   return bld_def(nvi, 0, new_value(bld->pc, file, 4)); +} + +static struct nv_value * +bld_phi(struct bld_context *bld, struct nv_basic_block *b, +        struct bld_register *reg) +{ +   struct nv_basic_block *in; +   struct nv_value *vals[16] = { NULL }; +   struct nv_value *val; +   struct nv_instruction *phi; +   int i, j, n; + +   do { +      i = n = 0; +      fetch_by_bb(reg, vals, &n, b); + +      if (!n) { +         bld_warn_uninitialized(bld, 0, reg, b); +         return NULL; +      } + +      if (n == 1) { +         if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb)) +            break; + +         bld_warn_uninitialized(bld, 1, reg, b); + +         /* back-tracking to insert missing value of other path */ +         in = b; +         while (in->in[0]) { +            if (in->num_in == 1) { +               in = in->in[0]; +            } else { +               if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b)) +                  in = in->in[0]; +               else +               if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b)) +                  in = in->in[1]; +               else +                  in = in->in[0]; +            } +         } +         bld->pc->current_block = in; + +         /* should make this a no-op */ +         bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file)); +         continue; +      } + +      for (i = 0; i < n; ++i) { +         /* if value dominates b, continue to the redefinitions */ +         if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb)) +            continue; + +         /* if value dominates any in-block, b should be the dom frontier */ +         for (j = 0; j < b->num_in; ++j) +            if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb)) +               break; +         /* otherwise, find the dominance frontier and put the phi there */ +         if (j == b->num_in) { +            in = nvc0_bblock_dom_frontier(vals[i]->insn->bb); +            val = bld_phi(bld, in, reg); +            bld_register_add_val(reg, val); +            break; +         } +      } +   } while(i < n); + +   bld->pc->current_block = b; + +   if (n == 1) +      return vals[0]; + +   phi = new_instruction(bld->pc, NV_OP_PHI); + +   bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size)); +   for (i = 0; i < n; ++i) +      nv_reference(bld->pc, phi, i, vals[i]); + +   return phi->def[0]; +} + +/* Insert a phi function in the loop header. + * For nested loops, we need to insert phi functions in all the outer + * loop headers if they don't have one yet. + * + * @def: redefinition from inside loop, or NULL if to be replaced later + */ +static struct nv_value * +bld_loop_phi(struct bld_context *bld, struct bld_register *reg, +             struct nv_value *def) +{ +   struct nv_instruction *phi; +   struct nv_basic_block *bb = bld->pc->current_block; +   struct nv_value *val = NULL; + +   if (bld->loop_lvl > 1) { +      --bld->loop_lvl; +      if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl))) +         val = bld_loop_phi(bld, reg, NULL); +      ++bld->loop_lvl; +   } + +   if (!val) +      val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */ +   if (!val) { +      bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0]; +      val = bld_undef(bld, bld_register_file(bld, reg)); +   } + +   bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]; + +   phi = new_instruction(bld->pc, NV_OP_PHI); + +   bld_def(phi, 0, new_value_like(bld->pc, val)); +   if (!def) +      def = phi->def[0]; + +   bld_register_add_val(reg, phi->def[0]); + +   phi->target = (struct nv_basic_block *)reg; /* cheat */ + +   nv_reference(bld->pc, phi, 0, val); +   nv_reference(bld->pc, phi, 1, def); + +   bld->pc->current_block = bb; + +   return phi->def[0]; +} + +static INLINE struct nv_value * +bld_fetch_global(struct bld_context *bld, struct bld_register *reg) +{ +   const uint16_t m = 1 << bld->loop_lvl; +   const uint16_t use = reg->loop_use; + +   reg->loop_use |= m; + +   /* If neither used nor def'd inside the loop, build a phi in foresight, +    * so we don't have to replace stuff later on, which requires tracking. +    */ +   if (bld->loop_lvl && !((use | reg->loop_def) & m)) +      return bld_loop_phi(bld, reg, NULL); + +   return bld_phi(bld, bld->pc->current_block, reg); +} + +static INLINE struct nv_value * +bld_imm_u32(struct bld_context *bld, uint32_t u) +{ +   int i; +   unsigned n = bld->num_immds; + +   for (i = 0; i < n; ++i) +      if (bld->saved_immd[i]->reg.imm.u32 == u) +         return bld->saved_immd[i]; + +   assert(n < BLD_MAX_IMMDS); +   bld->num_immds++; + +   bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4); +   bld->saved_immd[n]->reg.imm.u32 = u; +   return bld->saved_immd[n]; +} + +static void +bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *, +                  struct nv_value *); + +/* Replace the source of the phi in the loop header by the last assignment, + * or eliminate the phi function if there is no assignment inside the loop. + * + * Redundancy situation 1 - (used) but (not redefined) value: + *  %3 = phi %0, %3 = %3 is used + *  %3 = phi %0, %4 = is new definition + * + * Redundancy situation 2 - (not used) but (redefined) value: + *  %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE + */ +static void +bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) +{ +   struct nv_basic_block *save = bld->pc->current_block; +   struct nv_instruction *phi, *next; +   struct nv_value *val; +   struct bld_register *reg; +   int i, s, n; + +   for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) { +      next = phi->next; + +      reg = (struct bld_register *)phi->target; +      phi->target = NULL; + +      for (s = 1, n = 0; n < bb->num_in; ++n) { +         if (bb->in_kind[n] != CFG_EDGE_BACK) +            continue; + +         assert(s < 4); +         bld->pc->current_block = bb->in[n]; +         val = bld_fetch_global(bld, reg); + +         for (i = 0; i < 4; ++i) +            if (phi->src[i] && phi->src[i]->value == val) +               break; +         if (i == 4) +            nv_reference(bld->pc, phi, s++, val); +      } +      bld->pc->current_block = save; + +      if (phi->src[0]->value == phi->def[0] || +          phi->src[0]->value == phi->src[1]->value) +         s = 1; +      else +      if (phi->src[1]->value == phi->def[0]) +         s = 0; +      else +         continue; + +      if (s >= 0) { +         /* eliminate the phi */ +         bld_register_del_val(reg, phi->def[0]); + +         ++bld->pc->pass_seq; +         bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value); + +         nvc0_insn_delete(phi); +      } +   } +} + +static INLINE struct nv_value * +bld_imm_f32(struct bld_context *bld, float f) +{ +   return bld_imm_u32(bld, fui(f)); +} + +static struct nv_value * +bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0) +{ +   struct nv_instruction *insn = new_instruction(bld->pc, opcode); + +   nv_reference(bld->pc, insn, 0, src0); +    +   return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); +} + +static struct nv_value * +bld_insn_2(struct bld_context *bld, uint opcode, +           struct nv_value *src0, struct nv_value *src1) +{ +   struct nv_instruction *insn = new_instruction(bld->pc, opcode); + +   nv_reference(bld->pc, insn, 0, src0); +   nv_reference(bld->pc, insn, 1, src1); + +   return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); +} + +static struct nv_value * +bld_insn_3(struct bld_context *bld, uint opcode, +           struct nv_value *src0, struct nv_value *src1, +           struct nv_value *src2) +{ +   struct nv_instruction *insn = new_instruction(bld->pc, opcode); + +   nv_reference(bld->pc, insn, 0, src0); +   nv_reference(bld->pc, insn, 1, src1); +   nv_reference(bld->pc, insn, 2, src2); + +   return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); +} + +static INLINE void +bld_src_predicate(struct bld_context *bld, +                  struct nv_instruction *nvi, int s, struct nv_value *val) +{ +   nvi->predicate = s; +   nv_reference(bld->pc, nvi, s, val); +} + +static INLINE void +bld_src_pointer(struct bld_context *bld, +                struct nv_instruction *nvi, int s, struct nv_value *val) +{ +   nvi->indirect = s; +   nv_reference(bld->pc, nvi, s, val); +} + +static void +bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, +               struct nv_value *val) +{ +   struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST); +   struct nv_value *loc; + +   loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); + +   loc->reg.id = ofst * 4; + +   nv_reference(bld->pc, insn, 0, loc); +   nv_reference(bld->pc, insn, 1, ptr); +   nv_reference(bld->pc, insn, 2, val); +} + +static struct nv_value * +bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) +{ +   struct nv_value *loc, *val; + +   loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); + +   loc->reg.address = ofst * 4; + +   val = bld_insn_2(bld, NV_OP_LD, loc, ptr); + +   return val; +} + +static struct nv_value * +bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e) +{ +   struct nv_value *val; + +   val = bld_insn_1(bld, NV_OP_LG2, x); +   val = bld_insn_2(bld, NV_OP_MUL_F32, e, val); + +   val = bld_insn_1(bld, NV_OP_PREEX2, val); +   val = bld_insn_1(bld, NV_OP_EX2, val); + +   return val; +} + +static INLINE struct nv_value * +bld_load_imm_f32(struct bld_context *bld, float f) +{ +   if (f == 0.0f) +      return bld->zero; +   return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f)); +} + +static INLINE struct nv_value * +bld_load_imm_u32(struct bld_context *bld, uint32_t u) +{ +   if (u == 0) +      return bld->zero; +   return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u)); +} + +static INLINE struct nv_value * +bld_setp(struct bld_context *bld, uint op, uint8_t cc, +         struct nv_value *src0, struct nv_value *src1) +{ +   struct nv_value *val = bld_insn_2(bld, op, src0, src1); + +   val->reg.file = NV_FILE_PRED; +   val->reg.size = 1; +   val->insn->set_cond = cc & 0xf; +   return val; +} + +static INLINE struct nv_value * +bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src) +{ +   struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src); +   val->insn->ext.cvt.d = dt; +   val->insn->ext.cvt.s = st; +   return val; +} + +static void +bld_kil(struct bld_context *bld, struct nv_value *src) +{ +   struct nv_instruction *nvi; + +   src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero); + +   nvi = new_instruction(bld->pc, NV_OP_KIL); +   nvi->fixed = 1; + +   bld_src_predicate(bld, nvi, 0, src); +} + +static void +bld_flow(struct bld_context *bld, uint opcode, +         struct nv_value *src, struct nv_basic_block *target, +         boolean reconverge) +{ +   struct nv_instruction *nvi; + +   if (reconverge) +      new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1; + +   nvi = new_instruction(bld->pc, opcode); +   nvi->target = target; +   nvi->terminator = 1; +   if (src) +      bld_src_predicate(bld, nvi, 0, src); +} + +static ubyte +translate_setcc(unsigned opcode) +{ +   switch (opcode) { +   case TGSI_OPCODE_SLT: return NV_CC_LT; +   case TGSI_OPCODE_SGE: return NV_CC_GE; +   case TGSI_OPCODE_SEQ: return NV_CC_EQ; +   case TGSI_OPCODE_SGT: return NV_CC_GT; +   case TGSI_OPCODE_SLE: return NV_CC_LE; +   case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U; +   case TGSI_OPCODE_STR: return NV_CC_TR; +   case TGSI_OPCODE_SFL: return NV_CC_FL; + +   case TGSI_OPCODE_ISLT: return NV_CC_LT; +   case TGSI_OPCODE_ISGE: return NV_CC_GE; +   case TGSI_OPCODE_USEQ: return NV_CC_EQ; +   case TGSI_OPCODE_USGE: return NV_CC_GE; +   case TGSI_OPCODE_USLT: return NV_CC_LT; +   case TGSI_OPCODE_USNE: return NV_CC_NE; +   default: +      assert(0); +      return NV_CC_FL; +   } +} + +static uint +translate_opcode(uint opcode) +{ +   switch (opcode) { +   case TGSI_OPCODE_ABS: return NV_OP_ABS_F32; +   case TGSI_OPCODE_ADD: return NV_OP_ADD_F32; +   case TGSI_OPCODE_SUB: return NV_OP_SUB_F32; +   case TGSI_OPCODE_UADD: return NV_OP_ADD_B32; +   case TGSI_OPCODE_AND: return NV_OP_AND; +   case TGSI_OPCODE_EX2: return NV_OP_EX2; +   case TGSI_OPCODE_CEIL: return NV_OP_CEIL; +   case TGSI_OPCODE_FLR: return NV_OP_FLOOR; +   case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC; +   case TGSI_OPCODE_COS: return NV_OP_COS; +   case TGSI_OPCODE_SIN: return NV_OP_SIN; +   case TGSI_OPCODE_DDX: return NV_OP_DFDX; +   case TGSI_OPCODE_DDY: return NV_OP_DFDY; +   case TGSI_OPCODE_F2I: +   case TGSI_OPCODE_F2U: +   case TGSI_OPCODE_I2F: +   case TGSI_OPCODE_U2F: return NV_OP_CVT; +   case TGSI_OPCODE_INEG: return NV_OP_NEG_S32; +   case TGSI_OPCODE_LG2: return NV_OP_LG2; +   case TGSI_OPCODE_ISHR: return NV_OP_SAR; +   case TGSI_OPCODE_USHR: return NV_OP_SHR; +   case TGSI_OPCODE_MAD: return NV_OP_MAD_F32; +   case TGSI_OPCODE_MAX: return NV_OP_MAX_F32; +   case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32; +   case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32; +   case TGSI_OPCODE_MIN: return NV_OP_MIN_F32; +   case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32; +   case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32; +   case TGSI_OPCODE_MUL: return NV_OP_MUL_F32; +   case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32; +   case TGSI_OPCODE_OR: return NV_OP_OR; +   case TGSI_OPCODE_RCP: return NV_OP_RCP; +   case TGSI_OPCODE_RSQ: return NV_OP_RSQ; +   case TGSI_OPCODE_SAD: return NV_OP_SAD; +   case TGSI_OPCODE_SHL: return NV_OP_SHL; +   case TGSI_OPCODE_SLT: +   case TGSI_OPCODE_SGE: +   case TGSI_OPCODE_SEQ: +   case TGSI_OPCODE_SGT: +   case TGSI_OPCODE_SLE: +   case TGSI_OPCODE_SNE: return NV_OP_FSET_F32; +   case TGSI_OPCODE_ISLT: +   case TGSI_OPCODE_ISGE: return NV_OP_SET_S32; +   case TGSI_OPCODE_USEQ: +   case TGSI_OPCODE_USGE: +   case TGSI_OPCODE_USLT: +   case TGSI_OPCODE_USNE: return NV_OP_SET_U32; +   case TGSI_OPCODE_TEX: return NV_OP_TEX; +   case TGSI_OPCODE_TXP: return NV_OP_TEX; +   case TGSI_OPCODE_TXB: return NV_OP_TXB; +   case TGSI_OPCODE_TXL: return NV_OP_TXL; +   case TGSI_OPCODE_XOR: return NV_OP_XOR; +   default: +      return NV_OP_NOP; +   } +} + +#if 0 +static ubyte +infer_src_type(unsigned opcode) +{ +   switch (opcode) { +   case TGSI_OPCODE_MOV: +   case TGSI_OPCODE_AND: +   case TGSI_OPCODE_OR: +   case TGSI_OPCODE_XOR: +   case TGSI_OPCODE_SAD: +   case TGSI_OPCODE_U2F: +   case TGSI_OPCODE_UADD: +   case TGSI_OPCODE_UDIV: +   case TGSI_OPCODE_UMOD: +   case TGSI_OPCODE_UMAD: +   case TGSI_OPCODE_UMUL: +   case TGSI_OPCODE_UMAX: +   case TGSI_OPCODE_UMIN: +   case TGSI_OPCODE_USEQ: +   case TGSI_OPCODE_USGE: +   case TGSI_OPCODE_USLT: +   case TGSI_OPCODE_USNE: +   case TGSI_OPCODE_USHR: +      return NV_TYPE_U32; +   case TGSI_OPCODE_I2F: +   case TGSI_OPCODE_IDIV: +   case TGSI_OPCODE_IMAX: +   case TGSI_OPCODE_IMIN: +   case TGSI_OPCODE_INEG: +   case TGSI_OPCODE_ISGE: +   case TGSI_OPCODE_ISHR: +   case TGSI_OPCODE_ISLT: +      return NV_TYPE_S32; +   default: +      return NV_TYPE_F32; +   } +} + +static ubyte +infer_dst_type(unsigned opcode) +{ +   switch (opcode) { +   case TGSI_OPCODE_MOV: +   case TGSI_OPCODE_F2U: +   case TGSI_OPCODE_AND: +   case TGSI_OPCODE_OR: +   case TGSI_OPCODE_XOR: +   case TGSI_OPCODE_SAD: +   case TGSI_OPCODE_UADD: +   case TGSI_OPCODE_UDIV: +   case TGSI_OPCODE_UMOD: +   case TGSI_OPCODE_UMAD: +   case TGSI_OPCODE_UMUL: +   case TGSI_OPCODE_UMAX: +   case TGSI_OPCODE_UMIN: +   case TGSI_OPCODE_USEQ: +   case TGSI_OPCODE_USGE: +   case TGSI_OPCODE_USLT: +   case TGSI_OPCODE_USNE: +   case TGSI_OPCODE_USHR: +      return NV_TYPE_U32; +   case TGSI_OPCODE_F2I: +   case TGSI_OPCODE_IDIV: +   case TGSI_OPCODE_IMAX: +   case TGSI_OPCODE_IMIN: +   case TGSI_OPCODE_INEG: +   case TGSI_OPCODE_ISGE: +   case TGSI_OPCODE_ISHR: +   case TGSI_OPCODE_ISLT: +      return NV_TYPE_S32; +   default: +      return NV_TYPE_F32; +   } +} +#endif + +static void +emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, +           unsigned chan, struct nv_value *res) +{ +   const struct tgsi_full_dst_register *reg = &inst->Dst[0]; +   struct nv_instruction *nvi; +   struct nv_value *mem; +   struct nv_value *ptr = NULL; +   int idx; + +   idx = reg->Register.Index; +   assert(chan < 4); + +   if (reg->Register.Indirect) +      ptr = FETCH_ADDR(reg->Indirect.Index, +                       tgsi_util_get_src_register_swizzle(®->Indirect, 0)); + +   switch (inst->Instruction.Saturate) { +   case TGSI_SAT_NONE: +      break; +   case TGSI_SAT_ZERO_ONE: +      res = bld_insn_1(bld, NV_OP_SAT, res); +      break; +   case TGSI_SAT_MINUS_PLUS_ONE: +      res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f)); +      res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f)); +      break; +   } + +   switch (reg->Register.File) { +   case TGSI_FILE_OUTPUT: +      if (!res->insn) +         res = bld_insn_1(bld, NV_OP_MOV, res); + +      if (bld->pc->is_fragprog) { +         assert(!ptr); +         STORE_OUTP(idx, chan, res); +      } else { +         nvi = new_instruction(bld->pc, NV_OP_EXPORT); +         mem = new_value(bld->pc, bld->ti->output_file, res->reg.size); +         nv_reference(bld->pc, nvi, 0, mem); +         nv_reference(bld->pc, nvi, 1, res); +         if (!ptr) +            mem->reg.address = bld->ti->output_loc[idx][chan]; +         else +            mem->reg.address = 0x80 + idx * 16 + chan * 4; +         nvi->fixed = 1; +      } +      break; +   case TGSI_FILE_TEMPORARY: +      assert(idx < BLD_MAX_TEMPS); +      if (!res->insn) +         res = bld_insn_1(bld, NV_OP_MOV, res); + +      assert(res->reg.file == NV_FILE_GPR); +      assert(res->insn->bb = bld->pc->current_block); + +      if (bld->ti->require_stores) +         bld_lmem_store(bld, ptr, idx * 4 + chan, res); +      else +         STORE_TEMP(idx, chan, res); +      break; +   case TGSI_FILE_ADDRESS: +      assert(idx < BLD_MAX_ADDRS); +      STORE_ADDR(idx, chan, res); +      break; +   } +} + +static INLINE uint32_t +bld_is_output_written(struct bld_context *bld, int i, int c) +{ +   if (c < 0) +      return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32)); +   return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32)); +} + +static void +bld_append_vp_ucp(struct bld_context *bld) +{ +   struct nv_value *res[6]; +   struct nv_value *ucp, *vtx, *out; +   struct nv_instruction *insn; +   int i, c; + +   assert(bld->ti->prog->vp.num_ucps <= 6); + +   for (c = 0; c < 4; ++c) { +      vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]); + +      for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { +         ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4); +         ucp->reg.address = i * 16 + c * 4; + +         if (c == 0) +            res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp); +         else +            res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]); +      } +   } + +   for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { +      (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4; +      (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; +      nv_reference(bld->pc, insn, 0, out); +      nv_reference(bld->pc, insn, 1, res[i]); +   } +} + +static void +bld_export_fp_outputs(struct bld_context *bld) +{ +   struct nv_value *vals[4]; +   struct nv_instruction *nvi; +   int i, c, n; + +   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) { +      if (!bld_is_output_written(bld, i, -1)) +         continue; +      for (n = 0, c = 0; c < 4; ++c) { +         if (!bld_is_output_written(bld, i, c)) +            continue; +         vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]); +         assert(vals[n]); +         vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]); +         vals[n++]->reg.id = bld->ti->output_loc[i][c]; +      } +      assert(n); + +      (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; +      for (c = 0; c < n; ++c) +         nv_reference(bld->pc, nvi, c, vals[c]); +   } +} + +static void +bld_new_block(struct bld_context *bld, struct nv_basic_block *b) +{ +   int i, c; + +   bld->pc->current_block = b; + +   for (i = 0; i < 4; ++i) +      bld->saved_addr[i][0] = NULL; +   for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) +      for (c = 0; c < 4; ++c) +         bld->saved_inputs[i][c] = NULL; + +   bld->out_kind = CFG_EDGE_FORWARD; +} + +static struct nv_value * +bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c) +{ +   if (bld->saved_inputs[i][c]) +      return bld->saved_inputs[i][c]; +   return NULL; +} + +static struct nv_value * +bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) +{ +   unsigned cent = mode & NVC0_INTERP_CENTROID; + +   mode &= ~NVC0_INTERP_CENTROID; +    +   if (val->reg.address == 0x3fc) { +      /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */ +      val = bld_insn_1(bld, NV_OP_LINTERP, val); +      val->insn->flat = 1; +      val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31)); +      val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f)); +      return val; +   } else +   if (mode == NVC0_INTERP_PERSPECTIVE) { +      val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]); +   } else { +      val = bld_insn_1(bld, NV_OP_LINTERP, val); +   } + +   val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0; +   val->insn->centroid = cent ? 1 : 0; +   return val; +} + +static struct nv_value * +emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, +           const unsigned s, const unsigned chan) +{ +   const struct tgsi_full_src_register *src = &insn->Src[s]; +   struct nv_value *res = NULL; +   struct nv_value *ptr = NULL; +   int idx, ind_idx, dim_idx; +   unsigned swz, ind_swz, sgn; + +   idx = src->Register.Index; +   swz = tgsi_util_get_full_src_register_swizzle(src, chan); + +   if (src->Register.Indirect) { +      ind_idx = src->Indirect.Index; +      ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0); + +      ptr = FETCH_ADDR(ind_idx, ind_swz); +   } + +   if (src->Register.Dimension) +      dim_idx = src->Dimension.Index; +   else +      dim_idx = 0; + +   switch (src->Register.File) { +   case TGSI_FILE_CONSTANT: +      assert(dim_idx < 14); +      res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4); +      res->reg.address = idx * 16 + swz * 4; +      res = bld_insn_1(bld, NV_OP_LD, res); +      if (ptr) +         bld_src_pointer(bld, res->insn, 1, ptr); +      break; +   case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */ +      assert(idx < bld->ti->immd32_nr); +      res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]); +      break; +   case TGSI_FILE_INPUT: +      assert(!src->Register.Dimension); +      if (!ptr) { +         res = bld_get_saved_input(bld, idx, swz); +         if (res) +            return res; +      } +      res = new_value(bld->pc, bld->ti->input_file, 4); +      if (ptr) +         res->reg.address = 0x80 + idx * 16 + swz * 4; +      else +         res->reg.address = bld->ti->input_loc[idx][swz]; + +      if (bld->pc->is_fragprog) +         res = bld_interp(bld, bld->ti->interp_mode[idx], res); +      else +         res = bld_insn_1(bld, NV_OP_VFETCH, res); + +      if (ptr) +         bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr); +      else +         bld->saved_inputs[idx][swz] = res; +      break; +   case TGSI_FILE_TEMPORARY: +      if (bld->ti->require_stores) +         res = bld_lmem_load(bld, ptr, idx * 4 + swz); +      else +         res = bld_fetch_global(bld, &bld->tvs[idx][swz]); +      break; +   case TGSI_FILE_ADDRESS: +      res = bld_fetch_global(bld, &bld->avs[idx][swz]); +      break; +   case TGSI_FILE_PREDICATE: +      res = bld_fetch_global(bld, &bld->pvs[idx][swz]); +      break; +   default: +      NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File); +      abort(); +      break;	    +   } +   if (!res) +      return bld_undef(bld, NV_FILE_GPR); + +   sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); + +   switch (sgn) { +   case TGSI_UTIL_SIGN_KEEP: +      break; +   case TGSI_UTIL_SIGN_CLEAR: +      res = bld_insn_1(bld, NV_OP_ABS_F32, res); +      break; +   case TGSI_UTIL_SIGN_TOGGLE: +      res = bld_insn_1(bld, NV_OP_NEG_F32, res); +      break; +   case TGSI_UTIL_SIGN_SET: +      res = bld_insn_1(bld, NV_OP_ABS_F32, res); +      res = bld_insn_1(bld, NV_OP_NEG_F32, res); +      break; +   default: +      NOUVEAU_ERR("illegal/unhandled src reg sign mode\n"); +      abort(); +      break; +   } + +   return res; +} + +static void +bld_lit(struct bld_context *bld, struct nv_value *dst0[4], +        const struct tgsi_full_instruction *insn) +{ +   struct nv_value *val0 = NULL; +   unsigned mask = insn->Dst[0].Register.WriteMask; + +   if (mask & ((1 << 0) | (1 << 3))) +      dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f); + +   if (mask & (3 << 1)) { +      val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero); +      if (mask & (1 << 1)) +         dst0[1] = val0; +   } + +   if (mask & (1 << 2)) { +      struct nv_value *val1, *val3, *src1, *src3, *pred; +      struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f); +      struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f); + +      src1 = emit_fetch(bld, insn, 0, 1); +      src3 = emit_fetch(bld, insn, 0, 3); + +      pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero); + +      val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero); +      val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128); +      val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128); +      val3 = bld_pow(bld, val1, val3); + +      dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero); +      bld_src_predicate(bld, dst0[2]->insn, 1, pred); + +      dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]); +   } +} + +static INLINE void +get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg) +{ +   switch (insn->Texture.Texture) { +   case TGSI_TEXTURE_1D: +      *arg = *dim = 1; +      break; +   case TGSI_TEXTURE_SHADOW1D: +      *dim = 1; +      *arg = 2; +      break; +   case TGSI_TEXTURE_UNKNOWN: +   case TGSI_TEXTURE_2D: +   case TGSI_TEXTURE_RECT: +      *arg = *dim = 2; +      break; +   case TGSI_TEXTURE_SHADOW2D: +   case TGSI_TEXTURE_SHADOWRECT: +      *dim = 2; +      *arg = 3; +      break; +   case TGSI_TEXTURE_3D: +   case TGSI_TEXTURE_CUBE: +      *dim = *arg = 3; +      break; +   default: +      assert(0); +      break; +   } +} + +static struct nv_value * +bld_clone(struct bld_context *bld, struct nv_instruction *nvi) +{ +   struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode); +   struct nv_instruction *next, *prev; +   int c; + +   next = dupi->next; +   prev = dupi->prev; + +   *dupi = *nvi; + +   dupi->next = next; +   dupi->prev = prev; + +   for (c = 0; c < 5 && nvi->def[c]; ++c) +      bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c])); + +   for (c = 0; c < 6 && nvi->src[c]; ++c) { +      dupi->src[c] = NULL; +      nv_reference(bld->pc, dupi, c, nvi->src[c]->value); +   } + +   return dupi->def[0]; +} + +/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */ +static void +load_proj_tex_coords(struct bld_context *bld, +                     struct nv_value *t[4], int dim, int arg, +                     const struct tgsi_full_instruction *insn) +{ +   int c; +   unsigned mask = (1 << dim) - 1; + +   if (arg != dim) +      mask |= 4; /* depth comparison value */ + +   t[3] = emit_fetch(bld, insn, 0, 3); +   if (t[3]->insn->opcode == NV_OP_PINTERP) { +      t[3] = bld_clone(bld, t[3]->insn); +      t[3]->insn->opcode = NV_OP_LINTERP; +      nv_reference(bld->pc, t[3]->insn, 1, NULL); +   } +   t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); + +   for (c = 0; c < 4; ++c) { +      if (!(mask & (1 << c))) +         continue; +      t[c] = emit_fetch(bld, insn, 0, c); + +      if (t[c]->insn->opcode != NV_OP_PINTERP) +         continue; +      mask &= ~(1 << c); + +      t[c] = bld_clone(bld, t[c]->insn); +      nv_reference(bld->pc, t[c]->insn, 1, t[3]); +   } +   if (mask == 0) +      return; + +   t[3] = emit_fetch(bld, insn, 0, 3); +   t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); + +   for (c = 0; c < 4; ++c) +      if (mask & (1 << c)) +         t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]); +} + +/* For a quad of threads / top left, top right, bottom left, bottom right + * pixels, do a different operation, and take src0 from a specific thread. + */ +#define QOP_ADD 0 +#define QOP_SUBR 1 +#define QOP_SUB 2 +#define QOP_MOV1 3 + +#define QOP(a, b, c, d) \ +   ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6)) + +static INLINE struct nv_value * +bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane, +           struct nv_value *src1, boolean wp) +{ +   struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1); +   val->insn->lanes = lane; +   val->insn->quadop = qop; +   if (wp) { +      assert(!"quadop predicate write"); +   } +   return val; +} + +static struct nv_instruction * +emit_tex(struct bld_context *bld, uint opcode, +         struct nv_value *dst[4], struct nv_value *t_in[4], +         int argc, int tic, int tsc, int cube) +{ +   struct nv_value *t[4]; +   struct nv_instruction *nvi; +   int c; + +   /* the inputs to a tex instruction must be separate values */ +   for (c = 0; c < argc; ++c) { +      t[c] = bld_insn_1(bld, NV_OP_MOV, t_in[c]); +      t[c]->insn->fixed = 1; +   } + +   nvi = new_instruction(bld->pc, opcode); +   for (c = 0; c < 4; ++c) +      dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4)); +   for (c = 0; c < argc; ++c) +      nv_reference(bld->pc, nvi, c, t[c]); + +   nvi->ext.tex.t = tic; +   nvi->ext.tex.s = tsc; +   nvi->tex_mask = 0xf; +   nvi->tex_cube = cube; +   nvi->tex_live = 0; +   nvi->tex_argc = argc; + +   return nvi; +} + +/* +static boolean +bld_is_constant(struct nv_value *val) +{ +   if (val->reg.file == NV_FILE_IMM) +      return TRUE; +   return val->insn && nvCG_find_constant(val->insn->src[0]); +} +*/ + +static void +bld_tex(struct bld_context *bld, struct nv_value *dst0[4], +        const struct tgsi_full_instruction *insn) +{ +   struct nv_value *t[4], *s[3]; +   uint opcode = translate_opcode(insn->Instruction.Opcode); +   int arg, dim, c; +   const int tic = insn->Src[1].Register.Index; +   const int tsc = tic; +   const int cube = (insn->Texture.Texture  == TGSI_TEXTURE_CUBE) ? 1 : 0; + +   get_tex_dim(insn, &dim, &arg); + +   if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) +      load_proj_tex_coords(bld, t, dim, arg, insn); +   else { +      for (c = 0; c < dim; ++c) +         t[c] = emit_fetch(bld, insn, 0, c); +      if (arg != dim) +         t[dim] = emit_fetch(bld, insn, 0, 2); +   } + +   if (cube) { +      assert(dim >= 3); +      for (c = 0; c < 3; ++c) +         s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]); + +      s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]); +      s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]); +      s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]); + +      for (c = 0; c < 3; ++c) +         t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]); +   } + +   if (opcode == NV_OP_TXB || opcode == NV_OP_TXL) +      t[arg++] = emit_fetch(bld, insn, 0, 3); +   emit_tex(bld, opcode, dst0, t, arg, tic, tsc, cube); +} + +static INLINE struct nv_value * +bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn, +        int n) +{ +   struct nv_value *dotp, *src0, *src1; +   int c; + +   src0 = emit_fetch(bld, insn, 0, 0); +   src1 = emit_fetch(bld, insn, 1, 0); +   dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); + +   for (c = 1; c < n; ++c) { +      src0 = emit_fetch(bld, insn, 0, c); +      src1 = emit_fetch(bld, insn, 1, c); +      dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp); +   } +   return dotp; +} + +#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \ +   for (chan = 0; chan < 4; ++chan)               \ +      if ((inst)->Dst[0].Register.WriteMask & (1 << chan)) + +static void +bld_instruction(struct bld_context *bld, +                const struct tgsi_full_instruction *insn) +{ +   struct nv_value *src0; +   struct nv_value *src1; +   struct nv_value *src2; +   struct nv_value *dst0[4] = { NULL }; +   struct nv_value *temp; +   int c; +   uint opcode = translate_opcode(insn->Instruction.Opcode); +   uint8_t mask = insn->Dst[0].Register.WriteMask; + +#ifdef NOUVEAU_DEBUG +   debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); +#endif +	 +   switch (insn->Instruction.Opcode) { +   case TGSI_OPCODE_ADD: +   case TGSI_OPCODE_MAX: +   case TGSI_OPCODE_MIN: +   case TGSI_OPCODE_MUL: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         src0 = emit_fetch(bld, insn, 0, c); +         src1 = emit_fetch(bld, insn, 1, c); +         dst0[c] = bld_insn_2(bld, opcode, src0, src1); +      } +      break; +   case TGSI_OPCODE_ARL: +      src1 = bld_imm_u32(bld, 4); +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         src0 = emit_fetch(bld, insn, 0, c); +         src0 = bld_insn_1(bld, NV_OP_FLOOR, src0); +         src0->insn->ext.cvt.d = NV_TYPE_S32; +         src0->insn->ext.cvt.s = NV_TYPE_F32; +         dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1); +      } +      break; +   case TGSI_OPCODE_CMP: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         src0 = emit_fetch(bld, insn, 0, c); +         src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero); +         src1 = emit_fetch(bld, insn, 1, c); +         src2 = emit_fetch(bld, insn, 2, c); +         dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0); +      } +      break; +   case TGSI_OPCODE_COS: +   case TGSI_OPCODE_SIN: +      src0 = emit_fetch(bld, insn, 0, 0); +      temp = bld_insn_1(bld, NV_OP_PRESIN, src0); +      if (insn->Dst[0].Register.WriteMask & 7) +         temp = bld_insn_1(bld, opcode, temp); +      for (c = 0; c < 3; ++c) +         if (insn->Dst[0].Register.WriteMask & (1 << c)) +            dst0[c] = temp; +      if (!(insn->Dst[0].Register.WriteMask & (1 << 3))) +         break; +      src0 = emit_fetch(bld, insn, 0, 3); +      temp = bld_insn_1(bld, NV_OP_PRESIN, src0); +      dst0[3] = bld_insn_1(bld, opcode, temp); +      break; +   case TGSI_OPCODE_DP2: +      temp = bld_dot(bld, insn, 2); +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) +         dst0[c] = temp; +      break; +   case TGSI_OPCODE_DP3: +      temp = bld_dot(bld, insn, 3); +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) +         dst0[c] = temp; +      break; +   case TGSI_OPCODE_DP4: +      temp = bld_dot(bld, insn, 4); +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) +         dst0[c] = temp; +      break; +   case TGSI_OPCODE_DPH: +      src0 = bld_dot(bld, insn, 3); +      src1 = emit_fetch(bld, insn, 1, 3); +      temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1); +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) +         dst0[c] = temp; +      break; +   case TGSI_OPCODE_DST: +      if (insn->Dst[0].Register.WriteMask & 1) +         dst0[0] = bld_imm_f32(bld, 1.0f); +      if (insn->Dst[0].Register.WriteMask & 2) { +         src0 = emit_fetch(bld, insn, 0, 1); +         src1 = emit_fetch(bld, insn, 1, 1); +         dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); +      } +      if (insn->Dst[0].Register.WriteMask & 4) +         dst0[2] = emit_fetch(bld, insn, 0, 2); +      if (insn->Dst[0].Register.WriteMask & 8) +         dst0[3] = emit_fetch(bld, insn, 1, 3); +      break; +   case TGSI_OPCODE_EXP: +      src0 = emit_fetch(bld, insn, 0, 0); +      temp = bld_insn_1(bld, NV_OP_FLOOR, src0); + +      if (insn->Dst[0].Register.WriteMask & 2) +         dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp); +      if (insn->Dst[0].Register.WriteMask & 1) { +         temp = bld_insn_1(bld, NV_OP_PREEX2, temp); +         dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp); +      } +      if (insn->Dst[0].Register.WriteMask & 4) { +         temp = bld_insn_1(bld, NV_OP_PREEX2, src0); +         dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp); +      } +      if (insn->Dst[0].Register.WriteMask & 8) +         dst0[3] = bld_imm_f32(bld, 1.0f); +      break; +   case TGSI_OPCODE_EX2: +      src0 = emit_fetch(bld, insn, 0, 0); +      temp = bld_insn_1(bld, NV_OP_PREEX2, src0); +      temp = bld_insn_1(bld, NV_OP_EX2, temp); +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) +         dst0[c] = temp; +      break; +   case TGSI_OPCODE_FRC: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         src0 = emit_fetch(bld, insn, 0, c); +         dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0); +         dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]); +      } +      break; +   case TGSI_OPCODE_KIL: +      for (c = 0; c < 4; ++c) +         bld_kil(bld, emit_fetch(bld, insn, 0, c)); +      break; +   case TGSI_OPCODE_KILP: +      (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1; +      break; +   case TGSI_OPCODE_IF: +   { +      struct nv_basic_block *b = new_basic_block(bld->pc); + +      assert(bld->cond_lvl < BLD_MAX_COND_NESTING); + +      nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD); + +      bld->join_bb[bld->cond_lvl] = bld->pc->current_block; +      bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; + +      src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, +                      emit_fetch(bld, insn, 0, 0), bld->zero); + +      bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); + +      ++bld->cond_lvl; +      bld_new_block(bld, b); +   } +      break; +   case TGSI_OPCODE_ELSE: +   { +      struct nv_basic_block *b = new_basic_block(bld->pc); + +      --bld->cond_lvl; +      nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); + +      bld->cond_bb[bld->cond_lvl]->exit->target = b; +      bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; + +      new_instruction(bld->pc, NV_OP_BRA)->terminator = 1; + +      ++bld->cond_lvl; +      bld_new_block(bld, b); +   } +      break; +   case TGSI_OPCODE_ENDIF: +   { +      struct nv_basic_block *b = new_basic_block(bld->pc); + +      --bld->cond_lvl; +      nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind); +      nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); + +      bld->cond_bb[bld->cond_lvl]->exit->target = b; + +      bld_new_block(bld, b); + +      if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) { +         bld->join_bb[bld->cond_lvl]->exit->prev->target = b; +         new_instruction(bld->pc, NV_OP_JOIN)->join = 1; +      } +   } +      break; +   case TGSI_OPCODE_BGNLOOP: +   { +      struct nv_basic_block *bl = new_basic_block(bld->pc); +      struct nv_basic_block *bb = new_basic_block(bld->pc); + +      assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING); + +      bld->loop_bb[bld->loop_lvl] = bl; +      bld->brkt_bb[bld->loop_lvl] = bb; + +      nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER); + +      bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]); + +      if (bld->loop_lvl == bld->pc->loop_nesting_bound) +         bld->pc->loop_nesting_bound++; + +      bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl); +      bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl); +      bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl); +   } +      break; +   case TGSI_OPCODE_BRK: +   { +      struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; + +      bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + +      if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ +         nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); + +      bld->out_kind = CFG_EDGE_FAKE; +   } +      break; +   case TGSI_OPCODE_CONT: +   { +      struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; + +      bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + +      nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + +      if ((bb = bld->join_bb[bld->cond_lvl - 1])) { +         bld->join_bb[bld->cond_lvl - 1] = NULL; +         nvc0_insn_delete(bb->exit->prev); +      } +      bld->out_kind = CFG_EDGE_FAKE; +   } +      break; +   case TGSI_OPCODE_ENDLOOP: +   { +      struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; + +      bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + +      nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + +      bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ + +      bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]); +   } +      break; +   case TGSI_OPCODE_ABS: +   case TGSI_OPCODE_CEIL: +   case TGSI_OPCODE_FLR: +   case TGSI_OPCODE_TRUNC: +   case TGSI_OPCODE_DDX: +   case TGSI_OPCODE_DDY: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         src0 = emit_fetch(bld, insn, 0, c); +         dst0[c] = bld_insn_1(bld, opcode, src0); +      }	    +      break; +   case TGSI_OPCODE_LIT: +      bld_lit(bld, dst0, insn); +      break; +   case TGSI_OPCODE_LRP: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         src0 = emit_fetch(bld, insn, 0, c); +         src1 = emit_fetch(bld, insn, 1, c); +         src2 = emit_fetch(bld, insn, 2, c); +         dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); +         dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2); +      } +      break; +   case TGSI_OPCODE_MOV: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) +         dst0[c] = emit_fetch(bld, insn, 0, c); +      break; +   case TGSI_OPCODE_MAD: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         src0 = emit_fetch(bld, insn, 0, c); +         src1 = emit_fetch(bld, insn, 1, c); +         src2 = emit_fetch(bld, insn, 2, c); +         dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2); +      } +      break; +   case TGSI_OPCODE_POW: +      src0 = emit_fetch(bld, insn, 0, 0); +      src1 = emit_fetch(bld, insn, 1, 0); +      temp = bld_pow(bld, src0, src1); +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) +         dst0[c] = temp; +      break; +   case TGSI_OPCODE_LOG: +      src0 = emit_fetch(bld, insn, 0, 0); +      src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0); +      temp = bld_insn_1(bld, NV_OP_LG2, src0); +      dst0[2] = temp; +      if (insn->Dst[0].Register.WriteMask & 3) { +         temp = bld_insn_1(bld, NV_OP_FLOOR, temp); +         dst0[0] = temp; +      } +      if (insn->Dst[0].Register.WriteMask & 2) { +         temp = bld_insn_1(bld, NV_OP_PREEX2, temp); +         temp = bld_insn_1(bld, NV_OP_EX2, temp); +         temp = bld_insn_1(bld, NV_OP_RCP, temp); +         dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp); +      } +      if (insn->Dst[0].Register.WriteMask & 8) +         dst0[3] = bld_imm_f32(bld, 1.0f); +      break; +   case TGSI_OPCODE_RCP: +   case TGSI_OPCODE_LG2: +      src0 = emit_fetch(bld, insn, 0, 0); +      temp = bld_insn_1(bld, opcode, src0); +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) +         dst0[c] = temp; +      break; +   case TGSI_OPCODE_RSQ: +      src0 = emit_fetch(bld, insn, 0, 0); +      temp = bld_insn_1(bld, NV_OP_ABS_F32, src0); +      temp = bld_insn_1(bld, NV_OP_RSQ, temp); +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) +         dst0[c] = temp; +      break; +   case TGSI_OPCODE_SLT: +   case TGSI_OPCODE_SGE: +   case TGSI_OPCODE_SEQ: +   case TGSI_OPCODE_SGT: +   case TGSI_OPCODE_SLE: +   case TGSI_OPCODE_SNE: +   case TGSI_OPCODE_ISLT: +   case TGSI_OPCODE_ISGE: +   case TGSI_OPCODE_USEQ: +   case TGSI_OPCODE_USGE: +   case TGSI_OPCODE_USLT: +   case TGSI_OPCODE_USNE: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         src0 = emit_fetch(bld, insn, 0, c); +         src1 = emit_fetch(bld, insn, 1, c); +         dst0[c] = bld_insn_2(bld, opcode, src0, src1); +         dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode); +      } +      break; +   case TGSI_OPCODE_SCS: +      if (insn->Dst[0].Register.WriteMask & 0x3) { +         src0 = emit_fetch(bld, insn, 0, 0); +         temp = bld_insn_1(bld, NV_OP_PRESIN, src0); +         if (insn->Dst[0].Register.WriteMask & 0x1) +            dst0[0] = bld_insn_1(bld, NV_OP_COS, temp); +         if (insn->Dst[0].Register.WriteMask & 0x2) +            dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp); +      } +      if (insn->Dst[0].Register.WriteMask & 0x4) +         dst0[2] = bld_imm_f32(bld, 0.0f); +      if (insn->Dst[0].Register.WriteMask & 0x8) +         dst0[3] = bld_imm_f32(bld, 1.0f); +      break; +   case TGSI_OPCODE_SSG: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */ +         src0 = emit_fetch(bld, insn, 0, c); +         src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero); +         temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000)); +         temp = bld_insn_2(bld, NV_OP_OR,  temp, bld_imm_f32(bld, 1.0f)); +         dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp); +         bld_src_predicate(bld, dst0[c]->insn, 1, src1); +      } +      break; +   case TGSI_OPCODE_SUB: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         src0 = emit_fetch(bld, insn, 0, c); +         src1 = emit_fetch(bld, insn, 1, c); +         dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1); +      } +      break; +   case TGSI_OPCODE_TEX: +   case TGSI_OPCODE_TXB: +   case TGSI_OPCODE_TXL: +   case TGSI_OPCODE_TXP: +      bld_tex(bld, dst0, insn); +      break; +   case TGSI_OPCODE_XPD: +      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { +         if (c == 3) { +            dst0[3] = bld_imm_f32(bld, 1.0f); +            break; +         } +         src0 = emit_fetch(bld, insn, 1, (c + 1) % 3); +         src1 = emit_fetch(bld, insn, 0, (c + 2) % 3); +         dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); + +         src0 = emit_fetch(bld, insn, 0, (c + 1) % 3); +         src1 = emit_fetch(bld, insn, 1, (c + 2) % 3); +         dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]); + +         dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG; +      } +      break; +   case TGSI_OPCODE_RET: +      (new_instruction(bld->pc, NV_OP_RET))->fixed = 1; +      break; +   case TGSI_OPCODE_END: +      /* VP outputs are exported in-place as scalars, optimization later */ +      if (bld->pc->is_fragprog) +         bld_export_fp_outputs(bld); +      if (bld->ti->append_ucp) +         bld_append_vp_ucp(bld); +      return; +   default: +      NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode); +      abort(); +      return; +   } + +   if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT && +       !bld->pc->is_fragprog) { +      struct nv_instruction *mi = NULL; +      uint size; + +      if (bld->ti->append_ucp) { +         if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) { +            bld->hpos_index = insn->Dst[0].Register.Index; +            for (c = 0; c < 4; ++c) +               if (mask & (1 << c)) +                  STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]); +         } +      } + +      for (c = 0; c < 4; ++c) +         if ((mask & (1 << c)) && +             ((dst0[c]->reg.file == NV_FILE_IMM) || +              (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR))) +            dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); + +      c = 0; +      if ((mask & 0x3) == 0x3) { +         mask &= ~0x3; +         size = 8; +         mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn; +      } +      if ((mask & 0xc) == 0xc) { +         mask &= ~0xc; +         if (mi) { +            size = 16; +            nv_reference(bld->pc, mi, 2, dst0[2]); +            nv_reference(bld->pc, mi, 3, dst0[3]); +         } else { +            c = 2; +            size = 8; +            mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn; +         } +      } else +      if (mi && (mask & 0x4)) { +         size = 12; +         mask &= ~0x4; +         nv_reference(bld->pc, mi, 2, dst0[2]); +      } + +      if (mi) { +         struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT); +         int s; + +         nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4)); +         nv_reference(bld->pc, ex, 1, mi->def[0]); + +         for (s = 1; s < size / 4; ++s) { +            bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4)); +            nv_reference(bld->pc, ex, s + 1, mi->def[s]); +         } + +         ex->fixed = 1; +         ex->src[0]->value->reg.size = size; +         ex->src[0]->value->reg.address = +            bld->ti->output_loc[insn->Dst[0].Register.Index][c]; +      } +   } + +   for (c = 0; c < 4; ++c) +      if (mask & (1 << c)) +         emit_store(bld, insn, c, dst0[c]); +} + +static INLINE void +bld_free_registers(struct bld_register *base, int n) +{ +   int i, c; + +   for (i = 0; i < n; ++i) +      for (c = 0; c < 4; ++c) +         util_dynarray_fini(&base[i * 4 + c].vals); +} + +int +nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti) +{ +   struct bld_context *bld = CALLOC_STRUCT(bld_context); +   unsigned ip; + +   pc->root[0] = pc->current_block = new_basic_block(pc); + +   bld->pc = pc; +   bld->ti = ti; + +   pc->loop_nesting_bound = 1; + +   bld->zero = new_value(pc, NV_FILE_GPR, 4); +   bld->zero->reg.id = 63; + +   if (pc->is_fragprog) { +      struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4); +      mem->reg.address = 0x7c; + +      bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem); +      bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]); +   } + +   for (ip = 0; ip < ti->num_insns; ++ip) +      bld_instruction(bld, &ti->insns[ip]); + +   bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS); +   bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS); +   bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS); +   bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS); + +   FREE(bld); +   return 0; +} + +/* If a variable is assigned in a loop, replace all references to the value + * from outside the loop with a phi value. + */ +static void +bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b, +                  struct nv_value *old_val, +                  struct nv_value *new_val) +{ +   struct nv_instruction *nvi; + +   for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) { +      int s; +      for (s = 0; s < 6 && nvi->src[s]; ++s) +         if (nvi->src[s]->value == old_val) +            nv_reference(pc, nvi, s, new_val); +   } + +   b->pass_seq = pc->pass_seq; + +   if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq) +      bld_replace_value(pc, b->out[0], old_val, new_val); + +   if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq) +      bld_replace_value(pc, b->out[1], old_val, new_val); +} diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c new file mode 100644 index 0000000000..10d0995a5a --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -0,0 +1,367 @@ + +#include "util/u_format.h" + +#include "nvc0_context.h" +#include "nvc0_transfer.h" + +#include "nv50_defs.xml.h" + +struct nvc0_transfer { +   struct pipe_transfer base; +   struct nvc0_m2mf_rect rect[2]; +   uint32_t nblocksx; +   uint32_t nblocksy; +}; + +static void +nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen, +                        const struct nvc0_m2mf_rect *dst, +                        const struct nvc0_m2mf_rect *src, +                        uint32_t nblocksx, uint32_t nblocksy) +{ +   struct nouveau_channel *chan = nouveau_screen(pscreen)->channel; +   const int cpp = dst->cpp; +   uint32_t src_ofst = src->base; +   uint32_t dst_ofst = dst->base; +   uint32_t height = nblocksy; +   uint32_t sy = src->y; +   uint32_t dy = dst->y; +   uint32_t exec = (1 << 20); + +   assert(dst->cpp == src->cpp); + +   if (nouveau_bo_tile_layout(src->bo)) { +      BEGIN_RING(chan, RING_MF(TILING_MODE_IN), 5); +      OUT_RING  (chan, src->tile_mode); +      OUT_RING  (chan, src->width * cpp); +      OUT_RING  (chan, src->height); +      OUT_RING  (chan, src->depth); +      OUT_RING  (chan, src->z); +   } else { +      src_ofst += src->y * src->pitch + src->x * cpp; + +      BEGIN_RING(chan, RING_MF(PITCH_IN), 1); +      OUT_RING  (chan, src->width * cpp); + +      exec |= NVC0_M2MF_EXEC_LINEAR_IN; +   } + +   if (nouveau_bo_tile_layout(dst->bo)) { +      BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); +      OUT_RING  (chan, dst->tile_mode); +      OUT_RING  (chan, dst->width * cpp); +      OUT_RING  (chan, dst->height); +      OUT_RING  (chan, dst->depth); +      OUT_RING  (chan, dst->z); +   } else { +      dst_ofst += dst->y * dst->pitch + dst->x * cpp; + +      BEGIN_RING(chan, RING_MF(PITCH_OUT), 1); +      OUT_RING  (chan, dst->width * cpp); + +      exec |= NVC0_M2MF_EXEC_LINEAR_OUT; +   } + +   while (height) { +      int line_count = height > 2047 ? 2047 : height; + +      MARK_RING (chan, 17, 4); + +      BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); +      OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); +      OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); + +      BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); +      OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); +      OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); + +      if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) { +         BEGIN_RING(chan, RING_MF(TILING_POSITION_IN_X), 2); +         OUT_RING  (chan, src->x * cpp); +         OUT_RING  (chan, sy); +      } else { +         src_ofst += line_count * src->pitch; +      } +      if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) { +         BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); +         OUT_RING  (chan, dst->x * cpp); +         OUT_RING  (chan, dy); +      } else { +         dst_ofst += line_count * dst->pitch; +      } + +      BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); +      OUT_RING  (chan, nblocksx * cpp); +      OUT_RING  (chan, line_count); +      BEGIN_RING(chan, RING_MF(EXEC), 1); +      OUT_RING  (chan, exec); + +      height -= line_count; +      sy += line_count; +      dy += line_count; +   } +} + +void +nvc0_m2mf_push_linear(struct nvc0_context *nvc0, +                      struct nouveau_bo *dst, unsigned domain, int offset, +                      unsigned size, void *data) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   uint32_t *src = (uint32_t *)data; +   unsigned count = (size + 3) / 4; + +   MARK_RING (chan, 8, 2); + +   BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); +   OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR); +   OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR); +   BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); +   OUT_RING  (chan, size); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_MF(EXEC), 1); +   OUT_RING  (chan, 0x100111); + +   while (count) { +      unsigned nr = AVAIL_RING(chan); + +      if (nr < 9) { +         FIRE_RING(chan); +         nvc0_make_bo_resident(nvc0, dst, NOUVEAU_BO_WR); +         continue; +      } +      nr = MIN2(count, nr - 1); +      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); +    +      BEGIN_RING_NI(chan, RING_MF(DATA), nr); +      OUT_RINGp (chan, src, nr); + +      src += nr; +      count -= nr; +   } +} + +void +nvc0_m2mf_copy_linear(struct nvc0_context *nvc0, +                      struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, +                      struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, +                      unsigned size) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; + +   while (size) { +      unsigned bytes = MIN2(size, 1 << 17); + +      MARK_RING (chan, 11, 4); + +      BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); +      OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); +      OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); +      BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); +      OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); +      OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); +      BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); +      OUT_RING  (chan, bytes); +      OUT_RING  (chan, 1); +      BEGIN_RING(chan, RING_MF(EXEC), 1); +      OUT_RING  (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | +                 NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT); + +      srcoff += bytes; +      dstoff += bytes; +      size -= bytes; +   } +} + +static void +nvc0_m2mf_push_rect(struct pipe_screen *pscreen, +                    const struct nvc0_m2mf_rect *dst, +                    const void *data, +                    unsigned nblocksx, unsigned nblocksy) +{ +   struct nouveau_channel *chan; +   const uint8_t *src = (const uint8_t *)data; +   const int cpp = dst->cpp; +   const int line_len = nblocksx * cpp; +   int dy = dst->y; + +   assert(nouveau_bo_tile_layout(dst->bo)); + +   BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); +   OUT_RING  (chan, dst->tile_mode); +   OUT_RING  (chan, dst->width * cpp); +   OUT_RING  (chan, dst->height); +   OUT_RING  (chan, dst->depth); +   OUT_RING  (chan, dst->z); + +   while (nblocksy) { +      int line_count, words; +      int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN); + +      if (size < (12 + words)) { +         FIRE_RING(chan); +         continue; +      } +      line_count = (size * 4) / line_len; +      words = (line_count * line_len + 3) / 4; + +      BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); +      OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); +      OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); + +      BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); +      OUT_RING  (chan, dst->x * cpp); +      OUT_RING  (chan, dy); +      BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); +      OUT_RING  (chan, line_len); +      OUT_RING  (chan, line_count); +      BEGIN_RING(chan, RING_MF(EXEC), 1); +      OUT_RING  (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | +                 NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN); + +      BEGIN_RING(chan, RING_MF(DATA), words); +      OUT_RINGp (chan, src, words); + +      dy += line_count; +      src += line_len * line_count; +      nblocksy -= line_count; +   } +} + +struct pipe_transfer * +nvc0_miptree_transfer_new(struct pipe_context *pctx, +                          struct pipe_resource *res, +                          struct pipe_subresource sr, +                          unsigned usage, +                          const struct pipe_box *box) +{ +   struct nvc0_context *nvc0 = nvc0_context(pctx); +   struct pipe_screen *pscreen = pctx->screen; +   struct nouveau_device *dev = nvc0->screen->base.device; +   struct nvc0_miptree *mt = nvc0_miptree(res); +   struct nvc0_miptree_level *lvl = &mt->level[sr.level]; +   struct nvc0_transfer *tx; +   uint32_t image; +   uint32_t w, h, z; +   int ret; + +   if (res->target == PIPE_TEXTURE_CUBE) +      image = sr.face; +   else +      image = 0; + +   tx = CALLOC_STRUCT(nvc0_transfer); +   if (!tx) +      return NULL; + +   pipe_resource_reference(&tx->base.resource, res); + +   tx->base.sr = sr; +   tx->base.usage = usage; +   tx->base.box = *box; + +   tx->nblocksx = util_format_get_nblocksx(res->format, box->width); +   tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + +   tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); + +   w = u_minify(res->width0, sr.level); +   h = u_minify(res->height0, sr.level); + +   tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format); + +   tx->rect[0].bo = mt->base.bo; +   tx->rect[0].base = lvl->image_offset[image]; +   tx->rect[0].tile_mode = lvl->tile_mode; +   tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); +   tx->rect[0].y = util_format_get_nblocksx(res->format, box->y); +   tx->rect[0].z = box->z; +   tx->rect[0].width = util_format_get_nblocksx(res->format, w); +   tx->rect[0].height = util_format_get_nblocksx(res->format, h); +   tx->rect[0].depth = res->depth0; +   tx->rect[0].pitch = lvl->pitch; +   tx->rect[0].domain = NOUVEAU_BO_VRAM; + +   if (!(usage & PIPE_TRANSFER_READ) && +       (res->depth0 == 1) && (tx->nblocksy * tx->base.stride < 512 * 4)) { +      /* don't allocate scratch buffer, upload through FIFO */ +   } + +   ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, +                        tx->nblocksy * tx->base.stride, &tx->rect[1].bo); +   if (ret) { +      FREE(tx); +      return NULL; +   } + +   tx->rect[1].width = tx->nblocksx; +   tx->rect[1].height = tx->nblocksy; +   tx->rect[1].depth = box->depth; +   tx->rect[1].pitch = tx->base.stride; +   tx->rect[1].domain = NOUVEAU_BO_GART; + +   if (usage & PIPE_TRANSFER_READ) { +      for (z = 0; z < box->depth; ++z) { +         nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], +                                 tx->nblocksx, tx->nblocksy); +         tx->rect[0].z++; +      } +   } +   tx->rect[0].z = box->z; + +   return &tx->base; +} + +void +nvc0_miptree_transfer_del(struct pipe_context *pctx, +                          struct pipe_transfer *transfer) +{ +   struct pipe_screen *pscreen = pctx->screen; +   struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; +   unsigned z; + +   if (tx->base.usage & PIPE_TRANSFER_WRITE) { +      for (z = 0; z < tx->base.box.depth; ++z) { +         nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], +                                 tx->nblocksx, tx->nblocksy); +         tx->rect[0].z++; +      } +   } + +   nouveau_bo_ref(NULL, &tx->rect[1].bo); +   pipe_resource_reference(&transfer->resource, NULL); + +   FREE(tx); +} + +void * +nvc0_miptree_transfer_map(struct pipe_context *pctx, +                          struct pipe_transfer *transfer) +{ +   struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; +   int ret; +   unsigned flags = 0; + +   if (tx->rect[1].bo->map) +      return tx->rect[1].bo->map; + +   if (transfer->usage & PIPE_TRANSFER_READ) +      flags = NOUVEAU_BO_RD; +   if (transfer->usage & PIPE_TRANSFER_WRITE) +      flags |= NOUVEAU_BO_WR; + +   ret = nouveau_bo_map(tx->rect[1].bo, flags); +   if (ret) +      return NULL; +   return tx->rect[1].bo->map; +} + +void +nvc0_miptree_transfer_unmap(struct pipe_context *pctx, +                            struct pipe_transfer *transfer) +{ +   struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; + +   nouveau_bo_unmap(tx->rect[1].bo); +} + diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h new file mode 100644 index 0000000000..aaebe408b5 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_transfer.h @@ -0,0 +1,38 @@ + +#ifndef __NVC0_TRANSFER_H__ +#define __NVC0_TRANSFER_H__ + +#include "pipe/p_state.h" + +struct pipe_transfer * +nvc0_miptree_transfer_new(struct pipe_context *pcontext, +                          struct pipe_resource *pt, +                          struct pipe_subresource sr, +                          unsigned usage, +                          const struct pipe_box *box); +void +nvc0_miptree_transfer_del(struct pipe_context *pcontext, +                          struct pipe_transfer *ptx); +void * +nvc0_miptree_transfer_map(struct pipe_context *pcontext, +                          struct pipe_transfer *ptx); +void +nvc0_miptree_transfer_unmap(struct pipe_context *pcontext, +                            struct pipe_transfer *ptx); + +struct nvc0_m2mf_rect { +   struct nouveau_bo *bo; +   uint32_t base; +   unsigned domain; +   uint32_t pitch; +   uint32_t width; +   uint32_t x; +   uint32_t height; +   uint32_t y; +   uint16_t depth; +   uint16_t z; +   uint16_t tile_mode; +   uint16_t cpp; +}; + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c new file mode 100644 index 0000000000..f1d5910e3d --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -0,0 +1,556 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nvc0_3d.xml.h" + +void +nvc0_vertex_state_delete(struct pipe_context *pipe, +                         void *hwcso) +{ +   struct nvc0_vertex_stateobj *so = hwcso; + +   if (so->translate) +      so->translate->release(so->translate); +   FREE(hwcso); +} + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, +                         unsigned num_elements, +                         const struct pipe_vertex_element *elements) +{ +    struct nvc0_vertex_stateobj *so; +    struct translate_key transkey; +    unsigned i; + +    assert(num_elements); + +    so = MALLOC(sizeof(*so) + +                (num_elements - 1) * sizeof(struct nvc0_vertex_element)); +    if (!so) +        return NULL; +    so->num_elements = num_elements; +    so->instance_bits = 0; + +    transkey.nr_elements = 0; +    transkey.output_stride = 0; + +    for (i = 0; i < num_elements; ++i) { +        const struct pipe_vertex_element *ve = &elements[i]; +        const unsigned vbi = ve->vertex_buffer_index; +        enum pipe_format fmt = ve->src_format; + +        so->element[i].pipe = elements[i]; +        so->element[i].state = nvc0_format_table[fmt].vtx; + +        if (!so->element[i].state) { +            switch (util_format_get_nr_components(fmt)) { +            case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; +            case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; +            case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; +            case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; +            default: +                assert(0); +                return NULL; +            } +            so->element[i].state = nvc0_format_table[fmt].vtx; +        } +        so->element[i].state |= i; + +        if (likely(!ve->instance_divisor)) { +            unsigned j = transkey.nr_elements++; + +            transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; +            transkey.element[j].input_format = ve->src_format; +            transkey.element[j].input_buffer = vbi; +            transkey.element[j].input_offset = ve->src_offset; +            transkey.element[j].instance_divisor = ve->instance_divisor; + +            transkey.element[j].output_format = fmt; +            transkey.element[j].output_offset = transkey.output_stride; +            transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; +        } else { +           so->instance_bits |= 1 << i; +        } +    } + +    so->translate = translate_create(&transkey); +    so->vtx_size = transkey.output_stride / 4; +    so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1); + +    return so; +} + +#define NVC0_3D_VERTEX_ATTRIB_INACTIVE                                       \ +   NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |                                 \ +   NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST + +void +nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct nvc0_vertex_stateobj *vertex = nvc0->vertex; +   struct pipe_vertex_buffer *vb; +   struct nvc0_vertex_element *ve; +   unsigned i; + +   nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + +   nvc0->vbo_fifo = 0; + +   BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements); +   for (i = 0; i < vertex->num_elements; ++i) { +      ve = &vertex->element[i]; +      vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; + +      if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { +         if (nvc0->vbo_push_hint) { +            nvc0->vbo_fifo |= 1 << i; +         } else { +            nvc0_migrate_vertices(nvc0_resource(vb->buffer), +                                  vb->buffer_offset, +                                  vb->buffer->width0 - vb->buffer_offset); +            nvc0->vbo_dirty = TRUE; +         } +      } + +      if (1 || likely(vb->stride)) { +         OUT_RING(chan, ve->state); +      } else { +         OUT_RING(chan, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST); +      } +   } + +   for (i = 0; i < vertex->num_elements; ++i) { +      struct nvc0_resource *res; +      unsigned size, offset; +       +      ve = &vertex->element[i]; +      vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; + +      if (nvc0->vbo_fifo || (0 && vb->stride == 0)) { +#if 0 +         if (!nvc0->vbo_fifo) +            nvc0_vbo_constant_attrib(nvc0, vb, ve); +#endif +         BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); +         OUT_RING  (chan, 0); +         continue; +      } + +      res = nvc0_resource(vb->buffer); +      size = vb->buffer->width0; +      offset = ve->pipe.src_offset + vb->buffer_offset; + +      if (unlikely(ve->pipe.instance_divisor)) { +         if (!(nvc0->state.instance_bits & (1 << i))) { +            INLIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); +         } +         BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1); +         OUT_RING  (chan, ve->pipe.instance_divisor); +      } else +      if (unlikely(nvc0->state.instance_bits & (1 << i))) { +         INLIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0); +      } + +      nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, res, NOUVEAU_BO_RD); + +      BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); +      OUT_RING  (chan, (1 << 12) | vb->stride); +      BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); +      OUT_RING  (chan, i); +      OUT_RESRCh(chan, res, size, NOUVEAU_BO_RD); +      OUT_RESRCl(chan, res, size, NOUVEAU_BO_RD); +      OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); +      OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); +   } +   for (; i < nvc0->state.num_vtxelts; ++i) { +      BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1); +      OUT_RING  (chan, NVC0_3D_VERTEX_ATTRIB_INACTIVE); +      BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); +      OUT_RING  (chan, 0); +   } + +   nvc0->state.num_vtxelts = vertex->num_elements; +   nvc0->state.instance_bits = vertex->instance_bits; +} + +#define NVC0_PRIM_GL_CASE(n) \ +   case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ +   switch (prim) { +   NVC0_PRIM_GL_CASE(POINTS); +   NVC0_PRIM_GL_CASE(LINES); +   NVC0_PRIM_GL_CASE(LINE_LOOP); +   NVC0_PRIM_GL_CASE(LINE_STRIP); +   NVC0_PRIM_GL_CASE(TRIANGLES); +   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); +   NVC0_PRIM_GL_CASE(TRIANGLE_FAN); +   NVC0_PRIM_GL_CASE(QUADS); +   NVC0_PRIM_GL_CASE(QUAD_STRIP); +   NVC0_PRIM_GL_CASE(POLYGON); +   NVC0_PRIM_GL_CASE(LINES_ADJACENCY); +   NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); +   NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); +   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); +   /* +   NVC0_PRIM_GL_CASE(PATCHES); */ +   default: +      return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; +      break; +   } +} + +static void +nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) +{ +   struct nvc0_context *nvc0 = chan->user_private; + +   nvc0_bufctx_emit_relocs(nvc0); +} + +#if 0 +static struct nouveau_bo * +nvc0_tfb_setup(struct nvc0_context *nvc0) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   struct nouveau_bo *tfb = NULL; +   int ret, i; + +   ret = nouveau_bo_new(nvc0->screen->base.device, +                        NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb); +   if (ret) +      return NULL; + +   ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR); +   if (ret) +      return NULL; +   memset(tfb->map, 0xee, 8 * 4 * 3); +   nouveau_bo_unmap(tfb); + +   BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5); +   OUT_RING  (chan, 1); +   OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); +   OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); +   OUT_RING  (chan, tfb->size); +   OUT_RING  (chan, 0); /* TFB_PRIMITIVE_ID(0) */ +   BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3); +   OUT_RING  (chan, 0); +   OUT_RING  (chan, 8); /* TFB_VARYING_COUNT(0) */ +   OUT_RING  (chan, 32); /* TFB_BUFFER_STRIDE(0) */ +   BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2); +   OUT_RING  (chan, 0x1f1e1d1c); +   OUT_RING  (chan, 0xa3a2a1a0); +   for (i = 1; i < 4; ++i) { +      BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1); +      OUT_RING  (chan, 0); +   } +   BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D_(0x135c), 1); +   OUT_RING  (chan, 1); +   BEGIN_RING(chan, RING_3D_(0x135c), 1); +   OUT_RING  (chan, 0); + +   return tfb; +} +#endif + +static void +nvc0_draw_arrays(struct nvc0_context *nvc0, +                 unsigned mode, unsigned start, unsigned count, +                 unsigned instance_count) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   unsigned prim; + +   chan->flush_notify = nvc0_draw_vbo_flush_notify; +   chan->user_private = nvc0; + +   prim = nvc0_prim_gl(mode); + +   while (instance_count--) { +      BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); +      OUT_RING  (chan, prim); +      BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2); +      OUT_RING  (chan, start); +      OUT_RING  (chan, count); +      INLIN_RING(chan, RING_3D(VERTEX_END_GL), 0); + +      prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; +   } + +   chan->flush_notify = NULL; +} + +static void +nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map, +                              unsigned start, unsigned count) +{ +   map += start; + +   if (count & 3) { +      unsigned i; +      BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), count & 3); +      for (i = 0; i < (count & 3); ++i) +         OUT_RING(chan, *map++); +      count &= ~3; +   } +   while (count) { +      unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; + +      BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr); +      for (i = 0; i < nr; ++i) { +         OUT_RING(chan, +                  (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); +         map += 4; +      } +      count -= nr * 4; +   } +} + +static void +nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map, +                              unsigned start, unsigned count) +{ +   map += start; + +   if (count & 1) { +      count &= ~1; +      BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); +      OUT_RING  (chan, *map++); +   } +   while (count) { +      unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + +      BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); +      for (i = 0; i < nr; ++i) { +         OUT_RING(chan, (map[1] << 16) | map[0]); +         map += 2; +      } +      count -= nr * 2; +   } +} + +static void +nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map, +                              unsigned start, unsigned count) +{ +   map += start; + +   while (count) { +      const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + +      BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr); +      OUT_RINGp    (chan, map, nr); + +      map += nr; +      count -= nr; +   } +} + +static void +nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map, +                                    unsigned start, unsigned count) +{ +   map += start; + +   if (count & 1) { +      count--; +      BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); +      OUT_RING  (chan, *map++); +   } +   while (count) { +      unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + +      BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); +      for (i = 0; i < nr; ++i) { +         OUT_RING(chan, (map[1] << 16) | map[0]); +         map += 2; +      } +      count -= nr * 2; +   } +} + +static void +nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, +                   unsigned mode, unsigned start, unsigned count, +                   unsigned instance_count, int32_t index_bias) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; +   void *data; +   struct pipe_transfer *transfer; +   unsigned prim; +   unsigned index_size = nvc0->idxbuf.index_size; + +   chan->flush_notify = nvc0_draw_vbo_flush_notify; +   chan->user_private = nvc0; + +   prim = nvc0_prim_gl(mode); + +   if (index_bias != nvc0->state.index_bias) { +      BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1); +      OUT_RING  (chan, index_bias); +      nvc0->state.index_bias = index_bias; +   } + +   if (nvc0_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) { +      struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); +      unsigned offset = nvc0->idxbuf.offset; +      unsigned limit = nvc0->idxbuf.buffer->width0 - 1; + +      if (index_size == 4) +         index_size = 2; +      else +      if (index_size == 2) +         index_size = 1; + +      while (instance_count--) { +         MARK_RING (chan, 11, 4); +         BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); +         OUT_RING  (chan, mode); +         BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7); +         OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); +         OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); +         OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD); +         OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD); +         OUT_RING  (chan, index_size); +         OUT_RING  (chan, start); +         OUT_RING  (chan, count); +         INLIN_RING(chan, RING_3D(VERTEX_END_GL), 0); + +         mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; +      } +   } else { +      data = pipe_buffer_map(&nvc0->pipe, nvc0->idxbuf.buffer, +                             PIPE_TRANSFER_READ, &transfer); +      if (!data) +         return; +      data = (uint8_t *)data + nvc0->idxbuf.offset; + +      while (instance_count--) { +         BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); +         OUT_RING  (chan, prim); +         switch (index_size) { +         case 1: +            nvc0_draw_elements_inline_u08(chan, data, start, count); +            break; +         case 2: +            nvc0_draw_elements_inline_u16(chan, data, start, count); +            break; +         case 4: +            if (shorten) +               nvc0_draw_elements_inline_u32_short(chan, data, start, count); +            else +               nvc0_draw_elements_inline_u32(chan, data, start, count); +            break; +         default: +            assert(0); +            return; +         } +         INLIN_RING(chan, RING_3D(VERTEX_END_GL), 0); + +         prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; +      } +   } + +   chan->flush_notify = NULL; +} + +void +nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ +   struct nvc0_context *nvc0 = nvc0_context(pipe); +   struct nouveau_channel *chan = nvc0->screen->base.channel; + +   /* For picking only a few vertices from a large user buffer, push is better, +    * if index count is larger and we expect repeated vertices, suggest upload. +    */ +   nvc0->vbo_push_hint = /* the 64 is heuristic */ +      !(info->indexed && +        ((info->max_index - info->min_index + 64) < info->count)); + +   nvc0_state_validate(nvc0); + +   if (nvc0->state.instance_base != info->start_instance) { +      nvc0->state.instance_base = info->start_instance; +      BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1); +      OUT_RING  (chan, info->start_instance); +   } + +   if (nvc0->vbo_fifo) { +      nvc0_push_vbo(nvc0, info); +      return; +   } + +   if (nvc0->vbo_dirty) { +      BEGIN_RING(chan, RING_3D_(0x142c), 1); +      OUT_RING  (chan, 0); +      nvc0->vbo_dirty = FALSE; +   } + +   if (!info->indexed) { +      nvc0_draw_arrays(nvc0, +                       info->mode, info->start, info->count, +                       info->instance_count); +   } else { +      boolean shorten = info->max_index <= 65535; + +      assert(nvc0->idxbuf.buffer); + +      if (info->primitive_restart != nvc0->state.prim_restart) { +         if (info->primitive_restart) { +            BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2); +            OUT_RING  (chan, 1); +            OUT_RING  (chan, info->restart_index); + +            if (info->restart_index > 65535) +               shorten = FALSE; +         } else { +            INLIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0); +         } +         nvc0->state.prim_restart = info->primitive_restart; +      } else +      if (info->primitive_restart) { +         BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1); +         OUT_RING  (chan, info->restart_index); +      } + +      nvc0_draw_elements(nvc0, shorten, +                         info->mode, info->start, info->count, +                         info->instance_count, info->index_bias); +   } +} diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h new file mode 100644 index 0000000000..34bc536765 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -0,0 +1,189 @@ + +#ifndef __NVC0_WINSYS_H__ +#define __NVC0_WINSYS_H__ + +#include <stdint.h> +#include <unistd.h> +#include "pipe/p_defines.h" + +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_device.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_reloc.h" + +#include "nvc0_resource.h" /* OUT_RESRC */ + +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + +#define NVC0_SUBCH_3D 1 +#define NVC0_SUBCH_2D 2 +#define NVC0_SUBCH_MF 3 + +#define NVC0_MF_(n) NVC0_M2MF_##n + +#define RING_3D(n) ((NVC0_SUBCH_3D << 13) | (NVC0_3D_##n >> 2)) +#define RING_2D(n) ((NVC0_SUBCH_2D << 13) | (NVC0_2D_##n >> 2)) +#define RING_MF(n) ((NVC0_SUBCH_MF << 13) | (NVC0_MF_(n) >> 2)) + +#define RING_3D_(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2)) +#define RING_2D_(m) ((NVC0_SUBCH_2D << 13) | ((m) >> 2)) +#define RING_MF_(m) ((NVC0_SUBCH_MF << 13) | ((m) >> 2)) + +#define RING_GR(gr, m) (((gr)->subc << 13) | ((m) >> 2)) + +int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min); + +static inline uint32_t +nouveau_bo_tile_layout(struct nouveau_bo *bo) +{ +   return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK; +} + +static INLINE void +WAIT_RING(struct nouveau_channel *chan, unsigned size) +{ +   if (chan->cur + size > chan->end) +      nouveau_pushbuf_flush(chan, size); +} + +static INLINE void +OUT_RING(struct nouveau_channel *chan, uint32_t data) +{ +   *(chan->cur++) = (data); +} + +/* incremental methods */ +static INLINE void +BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ +   WAIT_RING(chan, size + 1); +   OUT_RING (chan, (0x2 << 28) | (size << 16) | mthd); +} + +/* non-incremental */ +static INLINE void +BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ +   WAIT_RING(chan, size + 1); +   OUT_RING (chan, (0x6 << 28) | (size << 16) | mthd); +} + +/* increment-once */ +static INLINE void +BEGIN_RING_1I(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ +   WAIT_RING(chan, size + 1); +   OUT_RING (chan, (0xa << 28) | (size << 16) | mthd); +} + +/* inline-data */ +static INLINE void +INLIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned data) +{ +   WAIT_RING(chan, 1); +   OUT_RING (chan, (0x8 << 28) | (data << 16) | mthd); +} + +int +nouveau_pushbuf_marker_emit(struct nouveau_channel *chan, +                            unsigned wait_dwords, unsigned wait_relocs); +int +nouveau_pushbuf_emit_reloc(struct nouveau_channel *, void *ptr, +                           struct nouveau_bo *, uint32_t data, uint32_t data2, +                           uint32_t flags, uint32_t vor, uint32_t tor); +int +nouveau_pushbuf_submit(struct nouveau_channel *chan, struct nouveau_bo *bo, +                       unsigned offset, unsigned length); + +static INLINE int +MARK_RING(struct nouveau_channel *chan, unsigned dwords, unsigned relocs) +{ +   return nouveau_pushbuf_marker_emit(chan, dwords, relocs); +} + +static INLINE void +OUT_RINGf(struct nouveau_channel *chan, float data) +{ +   union { uint32_t i; float f; } u; +   u.f = data; +   OUT_RING(chan, u.i); +} + +static INLINE unsigned +AVAIL_RING(struct nouveau_channel *chan) +{ +   return chan->end - chan->cur; +} + +static INLINE void +OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned size) +{ +   memcpy(chan->cur, data, size * 4); +   chan->cur += size; +} + +static INLINE int +OUT_RELOC(struct nouveau_channel *chan, struct nouveau_bo *bo, +          unsigned data, unsigned flags, unsigned vor, unsigned tor) +{ +   return nouveau_pushbuf_emit_reloc(chan, chan->cur++, bo, +                                     data, 0, flags, vor, tor); +} + +static INLINE int +OUT_RELOCl(struct nouveau_channel *chan, struct nouveau_bo *bo, +           unsigned delta, unsigned flags) +{ +   return OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_LOW, 0, 0); +} + +static INLINE int +OUT_RELOCh(struct nouveau_channel *chan, struct nouveau_bo *bo, +           unsigned delta, unsigned flags) +{ +   return OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_HIGH, 0, 0); +} + +static INLINE int +OUT_RESRCh(struct nouveau_channel *chan, struct nvc0_resource *res, +           unsigned delta, unsigned flags) +{ +   return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE int +OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, +           unsigned delta, unsigned flags) +{ +   return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE void +FIRE_RING(struct nouveau_channel *chan) +{ +   nouveau_pushbuf_flush(chan, 0); +} + +static INLINE void +BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s) +{ +   struct nouveau_subchannel *subc = &gr->channel->subc[s]; + +   assert(s < 8); +   if (subc->gr) { +      assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT); +      subc->gr->bound = NOUVEAU_GROBJ_UNBOUND; +   } +   subc->gr = gr; +   subc->gr->subc = s; +   subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT; + +   BEGIN_RING(chan, RING_GR(gr, 0x0000), 1); +   OUT_RING  (chan, gr->grclass); +} + +#endif  | 
