diff options
Diffstat (limited to 'src/gallium/docs/source')
-rw-r--r-- | src/gallium/docs/source/conf.py | 197 | ||||
-rw-r--r-- | src/gallium/docs/source/context.rst | 367 | ||||
-rw-r--r-- | src/gallium/docs/source/cso.rst | 14 | ||||
-rw-r--r-- | src/gallium/docs/source/cso/blend.rst | 55 | ||||
-rw-r--r-- | src/gallium/docs/source/cso/dsa.rst | 61 | ||||
-rw-r--r-- | src/gallium/docs/source/cso/rasterizer.rst | 188 | ||||
-rw-r--r-- | src/gallium/docs/source/cso/sampler.rst | 109 | ||||
-rw-r--r-- | src/gallium/docs/source/cso/shader.rst | 12 | ||||
-rw-r--r-- | src/gallium/docs/source/cso/velems.rst | 24 | ||||
-rw-r--r-- | src/gallium/docs/source/distro.rst | 187 | ||||
-rw-r--r-- | src/gallium/docs/source/exts/tgsi.py | 17 | ||||
-rw-r--r-- | src/gallium/docs/source/glossary.rst | 27 | ||||
-rw-r--r-- | src/gallium/docs/source/index.rst | 28 | ||||
-rw-r--r-- | src/gallium/docs/source/intro.rst | 9 | ||||
-rw-r--r-- | src/gallium/docs/source/screen.rst | 279 | ||||
-rw-r--r-- | src/gallium/docs/source/tgsi.rst | 1474 |
16 files changed, 3048 insertions, 0 deletions
diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py new file mode 100644 index 0000000000..ccc84405c4 --- /dev/null +++ b/src/gallium/docs/source/conf.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# +# Gallium documentation build configuration file, created by +# sphinx-quickstart on Sun Dec 20 14:09:05 2009. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.append(os.path.abspath('exts')) + +# -- General configuration ----------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.pngmath', 'tgsi'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Gallium' +copyright = u'2009, VMWare, X.org, Nouveau' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.4' +# The full version, including alpha/beta/rc tags. +release = '0.4' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# The language for highlighting source code. +highlight_language = 'c' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. Major themes that come with +# Sphinx are currently 'default' and 'sphinxdoc'. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_use_modindex = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Galliumdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'Gallium.tex', u'Gallium Documentation', + u'VMWare, X.org, Nouveau', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst new file mode 100644 index 0000000000..4e35a4c408 --- /dev/null +++ b/src/gallium/docs/source/context.rst @@ -0,0 +1,367 @@ +.. _context: + +Context +======= + +The context object represents the purest, most directly accessible, abilities +of the device's 3D rendering pipeline. + +Methods +------- + +CSO State +^^^^^^^^^ + +All CSO state is created, bound, and destroyed, with triplets of methods that +all follow a specific naming scheme. For example, ``create_blend_state``, +``bind_blend_state``, and ``destroy_blend_state``. + +CSO objects handled by the context object: + +* :ref:`Blend`: ``*_blend_state`` +* :ref:`Sampler`: These are special; they can be bound to either vertex or + fragment samplers, and they are bound in groups. + ``bind_fragment_sampler_states``, ``bind_vertex_sampler_states`` +* :ref:`Rasterizer`: ``*_rasterizer_state`` +* :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state`` +* :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for + fragment shaders, and ``*_vs_state`` is for vertex shaders. +* :ref:`Vertex Elements`: ``*_vertex_elements_state`` + + +Resource Binding State +^^^^^^^^^^^^^^^^^^^^^^ + +This state describes how resources in various flavours (textures, +buffers, surfaces) are bound to the driver. + + +* ``set_constant_buffer`` sets a constant buffer to be used for a given shader + type. index is used to indicate which buffer to set (some apis may allow + multiple ones to be set, and binding a specific one later, though drivers + are mostly restricted to the first one right now). + +* ``set_framebuffer_state`` + +* ``set_vertex_buffers`` + + +Non-CSO State +^^^^^^^^^^^^^ + +These pieces of state are too small, variable, and/or trivial to have CSO +objects. They all follow simple, one-method binding calls, e.g. +``set_blend_color``. + +* ``set_stencil_ref`` sets the stencil front and back reference values + which are used as comparison values in stencil test. +* ``set_blend_color`` +* ``set_sample_mask`` +* ``set_clip_state`` +* ``set_polygon_stipple`` +* ``set_scissor_state`` sets the bounds for the scissor test, which culls + pixels before blending to render targets. If the :ref:`Rasterizer` does + not have the scissor test enabled, then the scissor bounds never need to + be set since they will not be used. +* ``set_viewport_state`` + + +Sampler Views +^^^^^^^^^^^^^ + +These are the means to bind textures to shader stages. To create one, specify +its format, swizzle and LOD range in sampler view template. + +If texture format is different than template format, it is said the texture +is being cast to another format. Casting can be done only between compatible +formats, that is formats that have matching component order and sizes. + +Swizzle fields specify they way in which fetched texel components are placed +in the result register. For example, ``swizzle_r`` specifies what is going to be +placed in first component of result register. + +The ``first_level`` and ``last_level`` fields of sampler view template specify +the LOD range the texture is going to be constrained to. + +* ``set_fragment_sampler_views`` binds an array of sampler views to + fragment shader stage. Every binding point acquires a reference + to a respective sampler view and releases a reference to the previous + sampler view. + +* ``set_vertex_sampler_views`` binds an array of sampler views to vertex + shader stage. Every binding point acquires a reference to a respective + sampler view and releases a reference to the previous sampler view. + +* ``create_sampler_view`` creates a new sampler view. ``texture`` is associated + with the sampler view which results in sampler view holding a reference + to the texture. Format specified in template must be compatible + with texture format. + +* ``sampler_view_destroy`` destroys a sampler view and releases its reference + to associated texture. + + +Clearing +^^^^^^^^ + +Clear is one of the most difficult concepts to nail down to a single +interface (due to both different requirements from APIs and also driver/hw +specific differences). + +``clear`` initializes some or all of the surfaces currently bound to +the framebuffer to particular RGBA, depth, or stencil values. +Currently, this does not take into account color or stencil write masks (as +used by GL), and always clears the whole surfaces (no scissoring as used by +GL clear or explicit rectangles like d3d9 uses). It can, however, also clear +only depth or stencil in a combined depth/stencil surface, if the driver +supports PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE. +If a surface includes several layers/slices (XXX: not yet...) then all layers +will be cleared. + +``clear_render_target`` clears a single color rendertarget with the specified +color value. While it is only possible to clear one surface at a time (which can +include several layers), this surface need not be bound to the framebuffer. + +``clear_depth_stencil`` clears a single depth, stencil or depth/stencil surface +with the specified depth and stencil values (for combined depth/stencil buffers, +is is also possible to only clear one or the other part). While it is only +possible to clear one surface at a time (which can include several layers), +this surface need not be bound to the framebuffer. + + +Drawing +^^^^^^^ + +``draw_arrays`` draws a specified primitive. + +This command is equivalent to calling ``draw_arrays_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. + +``draw_elements`` draws a specified primitive using an optional +index buffer. + +This command is equivalent to calling ``draw_elements_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. + +``draw_range_elements`` + +XXX: this is (probably) a temporary entrypoint, as the range +information should be available from the vertex_buffer state. +Using this to quickly evaluate a specialized path in the draw +module. + +``draw_arrays_instanced`` draws multiple instances of the same primitive. + +This command is equivalent to calling ``draw_elements_instanced`` +with ``indexBuffer`` set to NULL and ``indexSize`` set to 0. + +``draw_elements_instanced`` draws multiple instances of the same primitive +using an optional index buffer. + +For instanceID in the range between ``startInstance`` +and ``startInstance``+``instanceCount``-1, inclusive, draw a primitive +specified by ``mode`` and sequential numbers in the range between ``start`` +and ``start``+``count``-1, inclusive. + +If ``indexBuffer`` is not NULL, it specifies an index buffer with index +byte size of ``indexSize``. The sequential numbers are used to lookup +the index buffer and the resulting indices in turn are used to fetch +vertex attributes. + +If ``indexBuffer`` is NULL, the sequential numbers are used directly +as indices to fetch vertex attributes. + +``indexBias`` is a value which is added to every index read from the index +buffer before fetching vertex attributes. + +``minIndex`` and ``maxIndex`` describe minimum and maximum index contained in +the index buffer. + +If a given vertex element has ``instance_divisor`` set to 0, it is said +it contains per-vertex data and effective vertex attribute address needs +to be recalculated for every index. + + attribAddr = ``stride`` * index + ``src_offset`` + +If a given vertex element has ``instance_divisor`` set to non-zero, +it is said it contains per-instance data and effective vertex attribute +address needs to recalculated for every ``instance_divisor``-th instance. + + attribAddr = ``stride`` * instanceID / ``instance_divisor`` + ``src_offset`` + +In the above formulas, ``src_offset`` is taken from the given vertex element +and ``stride`` is taken from a vertex buffer associated with the given +vertex element. + +The calculated attribAddr is used as an offset into the vertex buffer to +fetch the attribute data. + +The value of ``instanceID`` can be read in a vertex shader through a system +value register declared with INSTANCEID semantic name. + + +Queries +^^^^^^^ + +Queries gather some statistic from the 3D pipeline over one or more +draws. Queries may be nested, though no state tracker currently +exercises this. + +Queries can be created with ``create_query`` and deleted with +``destroy_query``. To start a query, use ``begin_query``, and when finished, +use ``end_query`` to end the query. + +``get_query_result`` is used to retrieve the results of a query. If +the ``wait`` parameter is TRUE, then the ``get_query_result`` call +will block until the results of the query are ready (and TRUE will be +returned). Otherwise, if the ``wait`` parameter is FALSE, the call +will not block and the return value will be TRUE if the query has +completed or FALSE otherwise. + +The most common type of query is the occlusion query, +``PIPE_QUERY_OCCLUSION_COUNTER``, which counts the number of fragments which +are written to the framebuffer without being culled by +:ref:`Depth, Stencil, & Alpha` testing or shader KILL instructions. + +Another type of query, ``PIPE_QUERY_TIME_ELAPSED``, returns the amount of +time, in nanoseconds, the context takes to perform operations. + +Gallium does not guarantee the availability of any query types; one must +always check the capabilities of the :ref:`Screen` first. + + +Conditional Rendering +^^^^^^^^^^^^^^^^^^^^^ + +A drawing command can be skipped depending on the outcome of a query +(typically an occlusion query). The ``render_condition`` function specifies +the query which should be checked prior to rendering anything. + +If ``render_condition`` is called with ``query`` = NULL, conditional +rendering is disabled and drawing takes place normally. + +If ``render_condition`` is called with a non-null ``query`` subsequent +drawing commands will be predicated on the outcome of the query. If +the query result is zero subsequent drawing commands will be skipped. + +If ``mode`` is PIPE_RENDER_COND_WAIT the driver will wait for the +query to complete before deciding whether to render. + +If ``mode`` is PIPE_RENDER_COND_NO_WAIT and the query has not yet +completed, the drawing command will be executed normally. If the query +has completed, drawing will be predicated on the outcome of the query. + +If ``mode`` is PIPE_RENDER_COND_BY_REGION_WAIT or +PIPE_RENDER_COND_BY_REGION_NO_WAIT rendering will be predicated as above +for the non-REGION modes but in the case that an occulusion query returns +a non-zero result, regions which were occluded may be ommitted by subsequent +drawing commands. This can result in better performance with some GPUs. +Normally, if the occlusion query returned a non-zero result subsequent +drawing happens normally so fragments may be generated, shaded and +processed even where they're known to be obscured. + + +Flushing +^^^^^^^^ + +``flush`` + + +Resource Busy Queries +^^^^^^^^^^^^^^^^^^^^^ + +``is_resource_referenced`` + + + +Blitting +^^^^^^^^ + +These methods emulate classic blitter controls. + +These methods operate directly on ``pipe_resource`` objects, and stand +apart from any 3D state in the context. Blitting functionality may be +moved to a separate abstraction at some point in the future. + +``resource_copy_region`` blits a region of a subresource of a resource to a +region of another subresource of a resource, provided that both resources have the +same format. The source and destination may be the same resource, but overlapping +blits are not permitted. + +``resource_resolve`` resolves a multisampled resource into a non-multisampled +one. Formats and dimensions must match. This function must be present if a driver +supports multisampling. + +The interfaces to these calls are likely to change to make it easier +for a driver to batch multiple blits with the same source and +destination. + + +Stream Output +^^^^^^^^^^^^^ + +Stream output, also known as transform feedback allows writing the results of the +vertex pipeline (after the geometry shader or vertex shader if no geometry shader +is present) to be written to a buffer created with a ``PIPE_BIND_STREAM_OUTPUT`` +flag. + +First a stream output state needs to be created with the +``create_stream_output_state`` call. It specific the details of what's being written, +to which buffer and with what kind of a writemask. + +Then target buffers needs to be set with the call to ``set_stream_output_buffers`` +which sets the buffers and the offsets from the start of those buffer to where +the data will be written to. + + +Transfers +^^^^^^^^^ + +These methods are used to get data to/from a resource. + +``get_transfer`` creates a transfer object. + +``transfer_destroy`` destroys the transfer object. May cause +data to be written to the resource at this point. + +``transfer_map`` creates a memory mapping for the transfer object. +The returned map points to the start of the mapped range according to +the box region, not the beginning of the resource. + +``transfer_unmap`` remove the memory mapping for the transfer object. +Any pointers into the map should be considered invalid and discarded. + +``transfer_inline_write`` performs a simplified transfer for simple writes. +Basically get_transfer, transfer_map, data write, transfer_unmap, and +transfer_destroy all in one. + +.. _transfer_flush_region: + +transfer_flush_region +%%%%%%%%%%%%%%%%%%%%% + +If a transfer was created with ``FLUSH_EXPLICIT``, it will not automatically +be flushed on write or unmap. Flushes must be requested with +``transfer_flush_region``. Flush ranges are relative to the mapped range, not +the beginning of the resource. + +.. _pipe_transfer: + +PIPE_TRANSFER +^^^^^^^^^^^^^ + +These flags control the behavior of a transfer object. + +* ``READ``: resource contents are read at transfer create time. +* ``WRITE``: resource contents will be written back at transfer destroy time. +* ``MAP_DIRECTLY``: a transfer should directly map the resource. May return + NULL if not supported. +* ``DISCARD``: The memory within the mapped region is discarded. + Cannot be used with ``READ``. +* ``DONTBLOCK``: Fail if the resource cannot be mapped immediately. +* ``UNSYNCHRONIZED``: Do not synchronize pending operations on the resource + when mapping. The interaction of any writes to the map and any + operations pending on the resource are undefined. Cannot be used with + ``READ``. +* ``FLUSH_EXPLICIT``: Written ranges will be notified later with + :ref:`transfer_flush_region`. Cannot be used with ``READ``. diff --git a/src/gallium/docs/source/cso.rst b/src/gallium/docs/source/cso.rst new file mode 100644 index 0000000000..dab1ee50f3 --- /dev/null +++ b/src/gallium/docs/source/cso.rst @@ -0,0 +1,14 @@ +CSO +=== + +CSO, Constant State Objects, are a core part of Gallium's API. + +CSO work on the principle of reusable state; they are created by filling +out a state object with the desired properties, then passing that object +to a context. The context returns an opaque context-specific handle which +can be bound at any time for the desired effect. + +.. toctree:: + :glob: + + cso/* diff --git a/src/gallium/docs/source/cso/blend.rst b/src/gallium/docs/source/cso/blend.rst new file mode 100644 index 0000000000..c74396284c --- /dev/null +++ b/src/gallium/docs/source/cso/blend.rst @@ -0,0 +1,55 @@ +.. _blend: + +Blend +===== + +This state controls blending of the final fragments into the target rendering +buffers. + +Blend Factors +------------- + +The blend factors largely follow the same pattern as their counterparts +in other modern and legacy drawing APIs. + +XXX blurb about dual-source blends + +Members +------- + +independent_blend_enable + If enabled, blend state is different for each render target, and + for each render target set in the respective member of the rt array. + If disabled, blend state is the same for all render targets, and only + the first member of the rt array contains valid data. +logicop_enable + Enables logic ops. Cannot be enabled at the same time as blending, and + is always the same for all render targets. +logicop_func + The logic operation to use if logic ops are enabled. One of PIPE_LOGICOP. +dither + Whether dithering is enabled. Note: Dithering is implementation-dependent. +rt + Contains the per-rendertarget blend state. + +Per-rendertarget Members +------------------------ + +blend_enable + If blending is enabled, perform a blend calculation according to blend + functions and source/destination factors. Otherwise, the incoming fragment + color gets passed unmodified (but colormask still applies). +rgb_func + The blend function to use for rgb channels. One of PIPE_BLEND. +rgb_src_factor + The blend source factor to use for rgb channels. One of PIPE_BLENDFACTOR. +rgb_dst_factor + The blend destination factor to use for rgb channels. One of PIPE_BLENDFACTOR. +alpha_func + The blend function to use for the alpha channel. One of PIPE_BLEND. +alpha_src_factor + The blend source factor to use for the alpha channel. One of PIPE_BLENDFACTOR. +alpha_dst_factor + The blend destination factor to use for alpha channel. One of PIPE_BLENDFACTOR. +colormask + Bitmask of which channels to write. Combination of PIPE_MASK bits. diff --git a/src/gallium/docs/source/cso/dsa.rst b/src/gallium/docs/source/cso/dsa.rst new file mode 100644 index 0000000000..1bbe381f9e --- /dev/null +++ b/src/gallium/docs/source/cso/dsa.rst @@ -0,0 +1,61 @@ +.. _depth,stencil,&alpha: + +Depth, Stencil, & Alpha +======================= + +These three states control the depth, stencil, and alpha tests, used to +discard fragments that have passed through the fragment shader. + +Traditionally, these three tests have been clumped together in hardware, so +they are all stored in one structure. + +During actual execution, the order of operations done on fragments is always: + +* Alpha +* Stencil +* Depth + +Depth Members +------------- + +enabled + Whether the depth test is enabled. +writemask + Whether the depth buffer receives depth writes. +func + The depth test function. One of PIPE_FUNC. + +Stencil Members +--------------- + +enabled + Whether the stencil test is enabled. For the second stencil, whether the + two-sided stencil is enabled. If two-sided stencil is disabled, the other + fields for the second array member are not valid. +func + The stencil test function. One of PIPE_FUNC. +valuemask + Stencil test value mask; this is ANDed with the value in the stencil + buffer and the reference value before doing the stencil comparison test. +writemask + Stencil test writemask; this controls which bits of the stencil buffer + are written. +fail_op + The operation to carry out if the stencil test fails. One of + PIPE_STENCIL_OP. +zfail_op + The operation to carry out if the stencil test passes but the depth test + fails. One of PIPE_STENCIL_OP. +zpass_op + The operation to carry out if the stencil test and depth test both pass. + One of PIPE_STENCIL_OP. + +Alpha Members +------------- + +enabled + Whether the alpha test is enabled. +func + The alpha test function. One of PIPE_FUNC. +ref_value + Alpha test reference value; used for certain functions. diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst new file mode 100644 index 0000000000..ad1612f93e --- /dev/null +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -0,0 +1,188 @@ +.. _rasterizer: + +Rasterizer +========== + +The rasterizer state controls the rendering of points, lines and triangles. +Attributes include polygon culling state, line width, line stipple, +multisample state, scissoring and flat/smooth shading. + +Shading +------- + +flatshade +^^^^^^^^^ + +If set, the provoking vertex of each polygon is used to determine the color +of the entire polygon. If not set, fragment colors will be interpolated +between the vertex colors. + +The actual interpolated shading algorithm is obviously +implementation-dependent, but will usually be Gourard for most hardware. + +.. note:: + + This is separate from the fragment shader input attributes + CONSTANT, LINEAR and PERSPECTIVE. The flatshade state is needed at + clipping time to determine how to set the color of new vertices. + + :ref:`Draw` can implement flat shading by copying the provoking vertex + color to all the other vertices in the primitive. + +flatshade_first +^^^^^^^^^^^^^^^ + +Whether the first vertex should be the provoking vertex, for most primitives. +If not set, the last vertex is the provoking vertex. + +There are several important exceptions to the specification of this rule. + +* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first + vertex. If the caller wishes to change the provoking vertex, they merely + need to rotate the vertices themselves. +* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no + effect; the provoking vertex is always the last vertex. +* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the + second vertex, not the first. This permits each segment of the fan to have + a different color. + +Polygons +-------- + +light_twoside +^^^^^^^^^^^^^ + +If set, there are per-vertex back-facing colors. The hardware +(perhaps assisted by :ref:`Draw`) should be set up to use this state +along with the front/back information to set the final vertex colors +prior to rasterization. + +The frontface vertex shader color output is marked with TGSI semantic +COLOR[0], and backface COLOR[1]. + +front_ccw + Indicates whether the window order of front-facing polygons is + counter-clockwise (TRUE) or clockwise (FALSE). + +cull_mode + Indicates which faces of polygons to cull, either PIPE_FACE_NONE + (cull no polygons), PIPE_FACE_FRONT (cull front-facing polygons), + PIPE_FACE_BACK (cull back-facing polygons), or + PIPE_FACE_FRONT_AND_BACK (cull all polygons). + +fill_front + Indicates how to fill front-facing polygons, either + PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE or + PIPE_POLYGON_MODE_POINT. +fill_back + Indicates how to fill back-facing polygons, either + PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE or + PIPE_POLYGON_MODE_POINT. + +poly_stipple_enable + Whether polygon stippling is enabled. +poly_smooth + Controls OpenGL-style polygon smoothing/antialiasing + +offset_point + If set, point-filled polygons will have polygon offset factors applied +offset_line + If set, line-filled polygons will have polygon offset factors applied +offset_tri + If set, filled polygons will have polygon offset factors applied + +offset_units + Specifies the polygon offset bias +offset_scale + Specifies the polygon offset scale + + + +Lines +----- + +line_width + The width of lines. +line_smooth + Whether lines should be smoothed. Line smoothing is simply anti-aliasing. +line_stipple_enable + Whether line stippling is enabled. +line_stipple_pattern + 16-bit bitfield of on/off flags, used to pattern the line stipple. +line_stipple_factor + When drawing a stippled line, each bit in the stipple pattern is + repeated N times, where N = line_stipple_factor + 1. +line_last_pixel + Controls whether the last pixel in a line is drawn or not. OpenGL + omits the last pixel to avoid double-drawing pixels at the ends of lines + when drawing connected lines. + + +Points +------ + +sprite_coord_enable +^^^^^^^^^^^^^^^^^^^ + +Specifies if a texture unit has its texture coordinates replaced or not. This +is a packed bitfield containing the enable for all texcoords -- if all bits +are zero, point sprites are effectively disabled. If any bit is set, then +point_smooth and point_quad_rasterization are ignored; point smoothing is +disabled and points are always rasterized as quads. If enabled, the four +vertices of the resulting quad will be assigned texture coordinates, +according to sprite_coord_mode. + +sprite_coord_mode +^^^^^^^^^^^^^^^^^ + +Specifies how the value for each shader output should be computed when drawing +point sprites. For PIPE_SPRITE_COORD_LOWER_LEFT, the lower-left vertex will +have coordinates (0,0,0,1). For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left +vertex will have coordinates (0,0,0,1). +This state is used by :ref:`Draw` to generate texcoords. + +.. note:: + + When geometry shaders are available, a special geometry shader could be + used instead of this functionality, to convert incoming points into quads + with the proper texture coordinates. + +point_quad_rasterization +^^^^^^^^^^^^^^^^^^^^^^^^ + +Determines if points should be rasterized as quads or points. Certain APIs, +like Direct3D, always use quad rasterization for points, regardless of +whether point sprites are enabled or not. If this state is enabled, point +smoothing and antialiasing are disabled. If it is disabled, point sprite +coordinates are not generated. + +.. note:: + + Some renderers always internally translate points into quads; this state + still affects those renderers by overriding other rasterization state. + +point_smooth + Whether points should be smoothed. Point smoothing turns rectangular + points into circles or ovals. +point_size_per_vertex + Whether the vertex shader is expected to have a point size output. + Undefined behaviour is permitted if there is disagreement between + this flag and the actual bound shader. +point_size + The size of points, if not specified per-vertex. + + + +Other Members +------------- + +scissor + Whether the scissor test is enabled. + +multisample + Whether :term:`MSAA` is enabled. + +gl_rasterization_rules + Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set, + the rasterizer will use (0, 0) for pixel centers. + diff --git a/src/gallium/docs/source/cso/sampler.rst b/src/gallium/docs/source/cso/sampler.rst new file mode 100644 index 0000000000..9bbb784de8 --- /dev/null +++ b/src/gallium/docs/source/cso/sampler.rst @@ -0,0 +1,109 @@ +.. _sampler: + +Sampler +======= + +Texture units have many options for selecting texels from loaded textures; +this state controls an individual texture unit's texel-sampling settings. + +Texture coordinates are always treated as four-dimensional, and referred to +with the traditional (S, T, R, Q) notation. + +Members +------- + +wrap_s + How to wrap the S coordinate. One of PIPE_TEX_WRAP_*. +wrap_t + How to wrap the T coordinate. One of PIPE_TEX_WRAP_*. +wrap_r + How to wrap the R coordinate. One of PIPE_TEX_WRAP_*. + +The wrap modes are: + +* ``PIPE_TEX_WRAP_REPEAT``: Standard coord repeat/wrap-around mode. +* ``PIPE_TEX_WRAP_CLAMP_TO_EDGE``: Clamp coord to edge of texture, the border + color is never sampled. +* ``PIPE_TEX_WRAP_CLAMP_TO_BORDER``: Clamp coord to border of texture, the + border color is sampled when coords go outside the range [0,1]. +* ``PIPE_TEX_WRAP_CLAMP``: The coord is clamped to the range [0,1] before + scaling to the texture size. This corresponds to the legacy OpenGL GL_CLAMP + texture wrap mode. Historically, this mode hasn't acted consistantly across + all graphics hardware. It sometimes acts like CLAMP_TO_EDGE or + CLAMP_TO_BORDER. The behaviour may also vary depending on linear vs. + nearest sampling mode. +* ``PIPE_TEX_WRAP_MIRROR_REPEAT``: If the integer part of the coordinate + is odd, the coord becomes (1 - coord). Then, normal texture REPEAT is + applied to the coord. +* ``PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE``: First, the absolute value of the + coordinate is computed. Then, regular CLAMP_TO_EDGE is applied to the coord. +* ``PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER``: First, the absolute value of the + coordinate is computed. Then, regular CLAMP_TO_BORDER is applied to the + coord. +* ``PIPE_TEX_WRAP_MIRROR_CLAMP``: First, the absolute value of the coord is + computed. Then, regular CLAMP is applied to the coord. + + +min_img_filter + The image filter to use when minifying texels. One of PIPE_TEX_FILTER_*. +mag_img_filter + The image filter to use when magnifying texels. One of PIPE_TEX_FILTER_*. + +The texture image filter modes are: + +* ``PIPE_TEX_FILTER_NEAREST``: One texel is fetched from the texture image + at the texture coordinate. +* ``PIPE_TEX_FILTER_LINEAR``: Two, four or eight texels (depending on the + texture dimensions; 1D/2D/3D) are fetched from the texture image and + linearly weighted and blended together. + +min_mip_filter + The filter to use when minifying mipmapped textures. One of + PIPE_TEX_MIPFILTER_*. + +The texture mip filter modes are: + +* ``PIPE_TEX_MIPFILTER_NEAREST``: A single mipmap level/image is selected + according to the texture LOD (lambda) value. +* ``PIPE_TEX_MIPFILTER_LINEAR``: The two mipmap levels/images above/below + the texture LOD value are sampled from. The results of sampling from + those two images are blended together with linear interpolation. +* ``PIPE_TEX_MIPFILTER_NONE``: Mipmap filtering is disabled. All texels + are taken from the level 0 image. + + +compare_mode + If set to PIPE_TEX_COMPARE_R_TO_TEXTURE, the result of texture sampling + is not a color but a true/false value which is the result of comparing the + sampled texture value (typically a Z value from a depth texture) to the + texture coordinate's R component. + If set to PIPE_TEX_COMPARE_NONE, no comparison calculation is performed. +compare_func + The inequality operator used when compare_mode=1. One of PIPE_FUNC_x. +normalized_coords + If set, the incoming texture coordinates (nominally in the range [0,1]) + will be scaled by the texture width, height, depth to compute texel + addresses. Otherwise, the texture coords are used as-is (they are not + scaled by the texture dimensions). + When normalized_coords=0, only a subset of the texture wrap modes are + allowed: PIPE_TEX_WRAP_CLAMP, PIPE_TEX_WRAP_CLAMP_TO_EDGE and + PIPE_TEX_WRAP_CLAMP_TO_BORDER. +lod_bias + Bias factor which is added to the computed level of detail. + The normal level of detail is computed from the partial derivatives of + the texture coordinates and/or the fragment shader TEX/TXB/TXL + instruction. +min_lod + Minimum level of detail, used to clamp LOD after bias. The LOD values + correspond to mipmap levels where LOD=0 is the level 0 mipmap image. +max_lod + Maximum level of detail, used to clamp LOD after bias. +border_color + RGBA color used for texel coordinates that are outside the [0,width-1], + [0, height-1] or [0, depth-1] ranges. +max_anisotropy + Maximum anistropy ratio to use when sampling from textures. For example, + if max_anistropy=4, a region of up to 1 by 4 texels will be sampled. + Set to zero to disable anisotropic filtering. Any other setting enables + anisotropic filtering, however it's not unexpected some drivers only will + change their filtering with a setting of 2 and higher. diff --git a/src/gallium/docs/source/cso/shader.rst b/src/gallium/docs/source/cso/shader.rst new file mode 100644 index 0000000000..0ee42c8787 --- /dev/null +++ b/src/gallium/docs/source/cso/shader.rst @@ -0,0 +1,12 @@ +.. _shader: + +Shader +====== + +One of the two types of shaders supported by Gallium. + +Members +------- + +tokens + A list of tgsi_tokens. diff --git a/src/gallium/docs/source/cso/velems.rst b/src/gallium/docs/source/cso/velems.rst new file mode 100644 index 0000000000..92cde014fb --- /dev/null +++ b/src/gallium/docs/source/cso/velems.rst @@ -0,0 +1,24 @@ +.. _vertexelements: + +Vertex Elements +=============== + +This state controls format etc. of the input attributes contained +in the pipe_vertex_buffer(s). There's one pipe_vertex_element array member +for each input attribute. + +Members +------- + +src_offset + The byte offset of the attribute in the buffer given by + vertex_buffer_index for the first vertex. +instance_divisor + The instance data rate divisor, used for instancing. + 0 means this is per-vertex data, n means per-instance data used for + n consecutive instances (n > 0). +vertex_buffer_index + The vertex buffer this attribute lives in. Several attributes may + live in the same vertex buffer. +src_format + The format of the attribute data. One of the PIPE_FORMAT tokens. diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst new file mode 100644 index 0000000000..6ba5a056f4 --- /dev/null +++ b/src/gallium/docs/source/distro.rst @@ -0,0 +1,187 @@ +Distribution +============ + +Along with the interface definitions, the following drivers, state trackers, +and auxiliary modules are shipped in the standard Gallium distribution. + +Drivers +------- + +Cell +^^^^ + +Simple driver for the IBM Cell architecture. Runs faster than :ref:`softpipe` +on Cell-based machines. + +Failover +^^^^^^^^ + +Broken and deprecated. + +Intel i915 +^^^^^^^^^^ + +Driver for Intel i915 and i945 chipsets. + +Intel i965 +^^^^^^^^^^ + +Highly experimental driver for Intel i965 chipsets. + +Identity +^^^^^^^^ + +Wrapper driver. The identity driver is a simple skeleton that passes through +all of its :ref:`Context` and :ref:`Screen` methods to an underlying Context +and Screen, and as such, it is an excellent starting point for new drivers. + +LLVM Softpipe +^^^^^^^^^^^^^ + +A version of :ref:`softpipe` that uses the Low-Level Virtual Machine to +dynamically generate optimized rasterizing pipelines. + +nVidia nvfx +^^^^^^^^^^^ + +Driver for the nVidia nv30 and nv40 families of GPUs. + +nVidia nv50 +^^^^^^^^^^^ + +Driver for the nVidia nv50 family of GPUs. + +VMWare SVGA +^^^^^^^^^^^ + +Driver for VMWare virtualized guest operating system graphics processing. + +ATI r300 +^^^^^^^^ + +Driver for the ATI/AMD r300, r400, and r500 families of GPUs. + +.. _softpipe: + +Softpipe +^^^^^^^^ + +Reference software rasterizer. Slow but accurate. + +Trace +^^^^^ + +Wrapper driver. Trace dumps an XML record of the calls made to the +:ref:`Context` and :ref:`Screen` objects that it wraps. + +State Trackers +-------------- + +.. _dri: + +Direct Rendering Infrastructure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Tracker that implements the client-side DRI protocol, for providing direct +acceleration services to X11 servers with the DRI extension. Supports DRI1 +and DRI2. Only GL is supported. + +.. _egl: + +EGL +^^^ + +Tracker for the Khronos EGL standard, used to set up GL and GLES contexts +without extra knowledge of the underlying windowing system. + +GLX +^^^ + +MesaGL +^^^^^^ + +Tracker implementing a GL state machine. Not usable as a standalone tracker; +Mesa should be built with another state tracker, such as :ref:`DRI` or +:ref:`EGL`. + +Python +^^^^^^ + +OpenVG +^^^^^^ + +WGL +^^^ + +Xorg/XFree86 DDX +^^^^^^^^^^^^^^^^ + +Tracker for XFree86 and Xorg X11 servers. Provides device-dependent +modesetting and acceleration as a DDX driver. + +Auxiliary +--------- + +OS +^^ + +The OS module contains the abstractions for basic operating system services: + +* memory allocation +* simple message logging +* obtaining run-time configuration option +* threading primitives + +This is the bare minimum required to port Gallium to a new platform. + +The OS module already provides the implementations of these abstractions for +the most common platforms. When targeting an embedded platform no +implementation will be provided -- these must be provided separately. + +CSO Cache +^^^^^^^^^ + +The CSO cache is used to accelerate preparation of state by saving +driver-specific state structures for later use. + +.. _draw: + +Draw +^^^^ + +Draw is a software :term:`TCL` pipeline for hardware that lacks vertex shaders +or other essential parts of pre-rasterization vertex preparation. + +Gallivm +^^^^^^^ + +Indices +^^^^^^^ + +Indices provides tools for translating or generating element indices for +use with element-based rendering. + +Pipe Buffer Managers +^^^^^^^^^^^^^^^^^^^^ + +Each of these managers provides various services to drivers that are not +fully utilizing a memory manager. + +Remote Debugger +^^^^^^^^^^^^^^^ + +Runtime Assembly Emission +^^^^^^^^^^^^^^^^^^^^^^^^^ + +TGSI +^^^^ + +The TGSI auxiliary module provides basic utilities for manipulating TGSI +streams. + +Translate +^^^^^^^^^ + +Util +^^^^ + diff --git a/src/gallium/docs/source/exts/tgsi.py b/src/gallium/docs/source/exts/tgsi.py new file mode 100644 index 0000000000..e92cd5c4d1 --- /dev/null +++ b/src/gallium/docs/source/exts/tgsi.py @@ -0,0 +1,17 @@ +# tgsi.py +# Sphinx extension providing formatting for TGSI opcodes +# (c) Corbin Simpson 2010 + +import docutils.nodes +import sphinx.addnodes + +def parse_opcode(env, sig, signode): + opcode, desc = sig.split("-", 1) + opcode = opcode.strip().upper() + desc = " (%s)" % desc.strip() + signode += sphinx.addnodes.desc_name(opcode, opcode) + signode += sphinx.addnodes.desc_annotation(desc, desc) + return opcode + +def setup(app): + app.add_description_unit("opcode", "opcode", "%s (TGSI opcode)", parse_opcode) diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst new file mode 100644 index 0000000000..acde56eafc --- /dev/null +++ b/src/gallium/docs/source/glossary.rst @@ -0,0 +1,27 @@ +Glossary +======== + +.. glossary:: + :sorted: + + MSAA + Multi-Sampled Anti-Aliasing. A basic anti-aliasing technique that takes + multiple samples of the depth buffer, and uses this information to + smooth the edges of polygons. + + TCL + Transform, Clipping, & Lighting. The three stages of preparation in a + rasterizing pipeline prior to the actual rasterization of vertices into + fragments. + + NPOT + Non-power-of-two. Usually applied to textures which have at least one + dimension which is not a power of two. + + LOD + Level of Detail. Also spelled "LoD." The value that determines when the + switches between mipmaps occur during texture sampling. + + GLSL + GL Shading Language. The official, common high-level shader language used + in GL 2.0 and above. diff --git a/src/gallium/docs/source/index.rst b/src/gallium/docs/source/index.rst new file mode 100644 index 0000000000..54bc883fce --- /dev/null +++ b/src/gallium/docs/source/index.rst @@ -0,0 +1,28 @@ +.. Gallium documentation master file, created by + sphinx-quickstart on Sun Dec 20 14:09:05 2009. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Gallium's documentation! +=================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + tgsi + screen + context + cso + distro + glossary + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/src/gallium/docs/source/intro.rst b/src/gallium/docs/source/intro.rst new file mode 100644 index 0000000000..1ea103840a --- /dev/null +++ b/src/gallium/docs/source/intro.rst @@ -0,0 +1,9 @@ +Introduction +============ + +What is Gallium? +---------------- + +Gallium is essentially an API for writing graphics drivers in a largely +device-agnostic fashion. It provides several objects which encapsulate the +core services of graphics hardware in a straightforward manner. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst new file mode 100644 index 0000000000..48d9d570b6 --- /dev/null +++ b/src/gallium/docs/source/screen.rst @@ -0,0 +1,279 @@ +.. _screen: + +Screen +====== + +A screen is an object representing the context-independent part of a device. + +Flags and enumerations +---------------------- + +XXX some of these don't belong in this section. + + +.. _pipe_cap: + +PIPE_CAP_* +^^^^^^^^^^ + +Capability queries return information about the features and limits of the +driver/GPU. For floating-point values, use :ref:`get_paramf`, and for boolean +or integer values, use :ref:`get_param`. + +The integer capabilities: + +* ``MAX_TEXTURE_IMAGE_UNITS``: The maximum number of samplers available. +* ``NPOT_TEXTURES``: Whether :term:`NPOT` textures may have repeat modes, + normalized coordinates, and mipmaps. +* ``TWO_SIDED_STENCIL``: Whether the stencil test can also affect back-facing + polygons. +* ``GLSL``: Deprecated. +* ``DUAL_SOURCE_BLEND``: Whether dual-source blend factors are supported. See + :ref:`Blend` for more information. +* ``ANISOTROPIC_FILTER``: Whether textures can be filtered anisotropically. +* ``POINT_SPRITE``: Whether point sprites are available. +* ``MAX_RENDER_TARGETS``: The maximum number of render targets that may be + bound. +* ``OCCLUSION_QUERY``: Whether occlusion queries are available. +* ``TIMER_QUERY``: Whether timer queries are available. +* ``TEXTURE_SHADOW_MAP``: XXX +* ``MAX_TEXTURE_2D_LEVELS``: The maximum number of mipmap levels available + for a 2D texture. +* ``MAX_TEXTURE_3D_LEVELS``: The maximum number of mipmap levels available + for a 3D texture. +* ``MAX_TEXTURE_CUBE_LEVELS``: The maximum number of mipmap levels available + for a cubemap. +* ``TEXTURE_MIRROR_CLAMP``: Whether mirrored texture coordinates with clamp + are supported. +* ``TEXTURE_MIRROR_REPEAT``: Whether mirrored repeating texture coordinates + are supported. +* ``MAX_VERTEX_TEXTURE_UNITS``: The maximum number of samplers addressable + inside the vertex shader. If this is 0, then the vertex shader cannot + sample textures. +* ``TGSI_CONT_SUPPORTED``: Whether the TGSI CONT opcode is supported. +* ``BLEND_EQUATION_SEPARATE``: Whether alpha blend equations may be different + from color blend equations, in :ref:`Blend` state. +* ``SM3``: Whether the vertex shader and fragment shader support equivalent + opcodes to the Shader Model 3 specification. XXX oh god this is horrible +* ``MAX_PREDICATE_REGISTERS``: XXX +* ``MAX_COMBINED_SAMPLERS``: The total number of samplers accessible from + the vertex and fragment shader, inclusive. +* ``MAX_CONST_BUFFERS``: Maximum number of constant buffers that can be bound + to any shader stage using ``set_constant_buffer``. If 0 or 1, the pipe will + only permit binding one constant buffer per shader, and the shaders will + not permit two-dimensional access to constants. + +If a value greater than 0 is returned, the driver can have multiple +constant buffers bound to shader stages. The CONST register file can +be accessed with two-dimensional indices, like in the example below. + +DCL CONST[0][0..7] # declare first 8 vectors of constbuf 0 +DCL CONST[3][0] # declare first vector of constbuf 3 +MOV OUT[0], CONST[0][3] # copy vector 3 of constbuf 0 + +For backwards compatibility, one-dimensional access to CONST register +file is still supported. In that case, the constbuf index is assumed +to be 0. + +* ``MAX_CONST_BUFFER_SIZE``: Maximum byte size of a single constant buffer. +* ``INDEP_BLEND_ENABLE``: Whether per-rendertarget blend enabling and channel + masks are supported. If 0, then the first rendertarget's blend mask is + replicated across all MRTs. +* ``INDEP_BLEND_FUNC``: Whether per-rendertarget blend functions are + available. If 0, then the first rendertarget's blend functions affect all + MRTs. +* ``PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT``: Whether the TGSI property + FS_COORD_ORIGIN with value UPPER_LEFT is supported. +* ``PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT``: Whether the TGSI property + FS_COORD_ORIGIN with value LOWER_LEFT is supported. +* ``PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER``: Whether the TGSI + property FS_COORD_PIXEL_CENTER with value HALF_INTEGER is supported. +* ``PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER``: Whether the TGSI + property FS_COORD_PIXEL_CENTER with value INTEGER is supported. + +The floating-point capabilities: + +* ``MAX_LINE_WIDTH``: The maximum width of a regular line. +* ``MAX_LINE_WIDTH_AA``: The maximum width of a smoothed line. +* ``MAX_POINT_WIDTH``: The maximum width and height of a point. +* ``MAX_POINT_WIDTH_AA``: The maximum width and height of a smoothed point. +* ``MAX_TEXTURE_ANISOTROPY``: The maximum level of anisotropy that can be + applied to anisotropically filtered textures. +* ``MAX_TEXTURE_LOD_BIAS``: The maximum :term:`LOD` bias that may be applied + to filtered textures. +* ``GUARD_BAND_LEFT``, ``GUARD_BAND_TOP``, ``GUARD_BAND_RIGHT``, + ``GUARD_BAND_BOTTOM``: XXX + +Fragment shader limits: + +* ``PIPE_CAP_MAX_FS_INSTRUCTIONS``: The maximum number of instructions. +* ``PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS``: The maximum number of arithmetic instructions. +* ``PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS``: The maximum number of texture instructions. +* ``PIPE_CAP_MAX_FS_TEX_INDIRECTIONS``: The maximum number of texture indirections. +* ``PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH``: The maximum nested control flow depth. +* ``PIPE_CAP_MAX_FS_INPUTS``: The maximum number of input registers. +* ``PIPE_CAP_MAX_FS_CONSTS``: The maximum number of constants. +* ``PIPE_CAP_MAX_FS_TEMPS``: The maximum number of temporary registers. +* ``PIPE_CAP_MAX_FS_ADDRS``: The maximum number of address registers. +* ``PIPE_CAP_MAX_FS_PREDS``: The maximum number of predicate registers. + +Vertex shader limits: + +* ``PIPE_CAP_MAX_VS_*``: Identical to ``PIPE_CAP_MAX_FS_*``. + + +.. _pipe_bind: + +PIPE_BIND_* +^^^^^^^^^^^ + +These flags indicate how a resource will be used and are specified at resource +creation time. Resources may be used in different roles +during their lifecycle. Bind flags are cumulative and may be combined to create +a resource which can be used for multiple things. +Depending on the pipe driver's memory management and these bind flags, +resources might be created and handled quite differently. + +* ``PIPE_BIND_RENDER_TARGET``: A color buffer or pixel buffer which will be + rendered to. Any surface/resource attached to pipe_framebuffer_state::cbufs + must have this flag set. +* ``PIPE_BIND_DEPTH_STENCIL``: A depth (Z) buffer and/or stencil buffer. Any + depth/stencil surface/resource attached to pipe_framebuffer_state::zsbuf must + have this flag set. +* ``PIPE_BIND_DISPLAY_TARGET``: A surface that can be presented to screen. Arguments to + pipe_screen::flush_front_buffer must have this flag set. +* ``PIPE_BIND_SAMPLER_VIEW``: A texture that may be sampled from in a fragment + or vertex shader. +* ``PIPE_BIND_VERTEX_BUFFER``: A vertex buffer. +* ``PIPE_BIND_INDEX_BUFFER``: An vertex index/element buffer. +* ``PIPE_BIND_CONSTANT_BUFFER``: A buffer of shader constants. +* ``PIPE_BIND_TRANSFER_WRITE``: A transfer object which will be written to. +* ``PIPE_BIND_TRANSFER_READ``: A transfer object which will be read from. +* ``PIPE_BIND_CUSTOM``: +* ``PIPE_BIND_SCANOUT``: A front color buffer or scanout buffer. +* ``PIPE_BIND_SHARED``: A sharable buffer that can be given to another + process. + +.. _pipe_usage: + +PIPE_USAGE_* +^^^^^^^^^^^^ + +The PIPE_USAGE enums are hints about the expected usage pattern of a resource. + +* ``PIPE_USAGE_DEFAULT``: Expect many uploads to the resource, intermixed with draws. +* ``PIPE_USAGE_DYNAMIC``: Expect many uploads to the resource, intermixed with draws. +* ``PIPE_USAGE_STATIC``: Same as immutable (?) +* ``PIPE_USAGE_IMMUTABLE``: Resource will not be changed after first upload. +* ``PIPE_USAGE_STREAM``: Upload will be followed by draw, followed by upload, ... + + + +PIPE_TEXTURE_GEOM +^^^^^^^^^^^^^^^^^ + +These flags are used when querying whether a particular pipe_format is +supported by the driver (with the `is_format_supported` function). +Some formats may only be supported for certain kinds of textures. +For example, a compressed format might only be used for POT textures. + +* ``PIPE_TEXTURE_GEOM_NON_SQUARE``: The texture may not be square +* ``PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO``: The texture dimensions may not be + powers of two. + + +Methods +------- + +XXX to-do + +get_name +^^^^^^^^ + +Returns an identifying name for the screen. + +get_vendor +^^^^^^^^^^ + +Returns the screen vendor. + +.. _get_param: + +get_param +^^^^^^^^^ + +Get an integer/boolean screen parameter. + +**param** is one of the :ref:`PIPE_CAP` names. + +.. _get_paramf: + +get_paramf +^^^^^^^^^^ + +Get a floating-point screen parameter. + +**param** is one of the :ref:`PIPE_CAP` names. + +context_create +^^^^^^^^^^^^^^ + +Create a pipe_context. + +**priv** is private data of the caller, which may be put to various +unspecified uses, typically to do with implementing swapbuffers +and/or front-buffer rendering. + +is_format_supported +^^^^^^^^^^^^^^^^^^^ + +Determine if a resource in the given format can be used in a specific manner. + +**format** the resource format + +**target** one of the PIPE_TEXTURE_x flags + +**sample_count** the number of samples. 0 and 1 mean no multisampling, +the maximum allowed legal value is 32. + +**bindings** is a bitmask of :ref:`PIPE_BIND` flags. + +**geom_flags** is a bitmask of PIPE_TEXTURE_GEOM_x flags. + +Returns TRUE if all usages can be satisfied. + +.. _resource_create: + +resource_create +^^^^^^^^^^^^^^^ + +Create a new resource from a template. +The following fields of the pipe_resource must be specified in the template: + +target + +format + +width0 + +height0 + +depth0 + +last_level + +nr_samples + +usage + +bind + +flags + + + +resource_destroy +^^^^^^^^^^^^^^^^ + +Destroy a resource. A resource is destroyed if it has no more references. + diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst new file mode 100644 index 0000000000..ecab7cb809 --- /dev/null +++ b/src/gallium/docs/source/tgsi.rst @@ -0,0 +1,1474 @@ +TGSI +==== + +TGSI, Tungsten Graphics Shader Infrastructure, is an intermediate language +for describing shaders. Since Gallium is inherently shaderful, shaders are +an important part of the API. TGSI is the only intermediate representation +used by all drivers. + +Basics +------ + +All TGSI instructions, known as *opcodes*, operate on arbitrary-precision +floating-point four-component vectors. An opcode may have up to one +destination register, known as *dst*, and between zero and three source +registers, called *src0* through *src2*, or simply *src* if there is only +one. + +Some instructions, like :opcode:`I2F`, permit re-interpretation of vector +components as integers. Other instructions permit using registers as +two-component vectors with double precision; see :ref:`Double Opcodes`. + +When an instruction has a scalar result, the result is usually copied into +each of the components of *dst*. When this happens, the result is said to be +*replicated* to *dst*. :opcode:`RCP` is one such instruction. + +Instruction Set +--------------- + +Core ISA +^^^^^^^^^^^^^^^^^^^^^^^^^ + +These opcodes are guaranteed to be available regardless of the driver being +used. + +.. opcode:: ARL - Address Register Load + +.. math:: + + dst.x = \lfloor src.x\rfloor + + dst.y = \lfloor src.y\rfloor + + dst.z = \lfloor src.z\rfloor + + dst.w = \lfloor src.w\rfloor + + +.. opcode:: MOV - Move + +.. math:: + + dst.x = src.x + + dst.y = src.y + + dst.z = src.z + + dst.w = src.w + + +.. opcode:: LIT - Light Coefficients + +.. math:: + + dst.x = 1 + + dst.y = max(src.x, 0) + + dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 + + dst.w = 1 + + +.. opcode:: RCP - Reciprocal + +This instruction replicates its result. + +.. math:: + + dst = \frac{1}{src.x} + + +.. opcode:: RSQ - Reciprocal Square Root + +This instruction replicates its result. + +.. math:: + + dst = \frac{1}{\sqrt{|src.x|}} + + +.. opcode:: EXP - Approximate Exponential Base 2 + +.. math:: + + dst.x = 2^{\lfloor src.x\rfloor} + + dst.y = src.x - \lfloor src.x\rfloor + + dst.z = 2^{src.x} + + dst.w = 1 + + +.. opcode:: LOG - Approximate Logarithm Base 2 + +.. math:: + + dst.x = \lfloor\log_2{|src.x|}\rfloor + + dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} + + dst.z = \log_2{|src.x|} + + dst.w = 1 + + +.. opcode:: MUL - Multiply + +.. math:: + + dst.x = src0.x \times src1.x + + dst.y = src0.y \times src1.y + + dst.z = src0.z \times src1.z + + dst.w = src0.w \times src1.w + + +.. opcode:: ADD - Add + +.. math:: + + dst.x = src0.x + src1.x + + dst.y = src0.y + src1.y + + dst.z = src0.z + src1.z + + dst.w = src0.w + src1.w + + +.. opcode:: DP3 - 3-component Dot Product + +This instruction replicates its result. + +.. math:: + + dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + +.. opcode:: DP4 - 4-component Dot Product + +This instruction replicates its result. + +.. math:: + + dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + +.. opcode:: DST - Distance Vector + +.. math:: + + dst.x = 1 + + dst.y = src0.y \times src1.y + + dst.z = src0.z + + dst.w = src1.w + + +.. opcode:: MIN - Minimum + +.. math:: + + dst.x = min(src0.x, src1.x) + + dst.y = min(src0.y, src1.y) + + dst.z = min(src0.z, src1.z) + + dst.w = min(src0.w, src1.w) + + +.. opcode:: MAX - Maximum + +.. math:: + + dst.x = max(src0.x, src1.x) + + dst.y = max(src0.y, src1.y) + + dst.z = max(src0.z, src1.z) + + dst.w = max(src0.w, src1.w) + + +.. opcode:: SLT - Set On Less Than + +.. math:: + + dst.x = (src0.x < src1.x) ? 1 : 0 + + dst.y = (src0.y < src1.y) ? 1 : 0 + + dst.z = (src0.z < src1.z) ? 1 : 0 + + dst.w = (src0.w < src1.w) ? 1 : 0 + + +.. opcode:: SGE - Set On Greater Equal Than + +.. math:: + + dst.x = (src0.x >= src1.x) ? 1 : 0 + + dst.y = (src0.y >= src1.y) ? 1 : 0 + + dst.z = (src0.z >= src1.z) ? 1 : 0 + + dst.w = (src0.w >= src1.w) ? 1 : 0 + + +.. opcode:: MAD - Multiply And Add + +.. math:: + + dst.x = src0.x \times src1.x + src2.x + + dst.y = src0.y \times src1.y + src2.y + + dst.z = src0.z \times src1.z + src2.z + + dst.w = src0.w \times src1.w + src2.w + + +.. opcode:: SUB - Subtract + +.. math:: + + dst.x = src0.x - src1.x + + dst.y = src0.y - src1.y + + dst.z = src0.z - src1.z + + dst.w = src0.w - src1.w + + +.. opcode:: LRP - Linear Interpolate + +.. math:: + + dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x + + dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y + + dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z + + dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w + + +.. opcode:: CND - Condition + +.. math:: + + dst.x = (src2.x > 0.5) ? src0.x : src1.x + + dst.y = (src2.y > 0.5) ? src0.y : src1.y + + dst.z = (src2.z > 0.5) ? src0.z : src1.z + + dst.w = (src2.w > 0.5) ? src0.w : src1.w + + +.. opcode:: DP2A - 2-component Dot Product And Add + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x + + +.. opcode:: FRC - Fraction + +.. math:: + + dst.x = src.x - \lfloor src.x\rfloor + + dst.y = src.y - \lfloor src.y\rfloor + + dst.z = src.z - \lfloor src.z\rfloor + + dst.w = src.w - \lfloor src.w\rfloor + + +.. opcode:: CLAMP - Clamp + +.. math:: + + dst.x = clamp(src0.x, src1.x, src2.x) + + dst.y = clamp(src0.y, src1.y, src2.y) + + dst.z = clamp(src0.z, src1.z, src2.z) + + dst.w = clamp(src0.w, src1.w, src2.w) + + +.. opcode:: FLR - Floor + +This is identical to :opcode:`ARL`. + +.. math:: + + dst.x = \lfloor src.x\rfloor + + dst.y = \lfloor src.y\rfloor + + dst.z = \lfloor src.z\rfloor + + dst.w = \lfloor src.w\rfloor + + +.. opcode:: ROUND - Round + +.. math:: + + dst.x = round(src.x) + + dst.y = round(src.y) + + dst.z = round(src.z) + + dst.w = round(src.w) + + +.. opcode:: EX2 - Exponential Base 2 + +This instruction replicates its result. + +.. math:: + + dst = 2^{src.x} + + +.. opcode:: LG2 - Logarithm Base 2 + +This instruction replicates its result. + +.. math:: + + dst = \log_2{src.x} + + +.. opcode:: POW - Power + +This instruction replicates its result. + +.. math:: + + dst = src0.x^{src1.x} + +.. opcode:: XPD - Cross Product + +.. math:: + + dst.x = src0.y \times src1.z - src1.y \times src0.z + + dst.y = src0.z \times src1.x - src1.z \times src0.x + + dst.z = src0.x \times src1.y - src1.x \times src0.y + + dst.w = 1 + + +.. opcode:: ABS - Absolute + +.. math:: + + dst.x = |src.x| + + dst.y = |src.y| + + dst.z = |src.z| + + dst.w = |src.w| + + +.. opcode:: RCC - Reciprocal Clamped + +This instruction replicates its result. + +XXX cleanup on aisle three + +.. math:: + + dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + +.. opcode:: DPH - Homogeneous Dot Product + +This instruction replicates its result. + +.. math:: + + dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + +.. opcode:: COS - Cosine + +This instruction replicates its result. + +.. math:: + + dst = \cos{src.x} + + +.. opcode:: DDX - Derivative Relative To X + +.. math:: + + dst.x = partialx(src.x) + + dst.y = partialx(src.y) + + dst.z = partialx(src.z) + + dst.w = partialx(src.w) + + +.. opcode:: DDY - Derivative Relative To Y + +.. math:: + + dst.x = partialy(src.x) + + dst.y = partialy(src.y) + + dst.z = partialy(src.z) + + dst.w = partialy(src.w) + + +.. opcode:: KILP - Predicated Discard + + discard + + +.. opcode:: PK2H - Pack Two 16-bit Floats + + TBD + + +.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars + + TBD + + +.. opcode:: PK4B - Pack Four Signed 8-bit Scalars + + TBD + + +.. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars + + TBD + + +.. opcode:: RFL - Reflection Vector + +.. math:: + + dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x + + dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y + + dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z + + dst.w = 1 + +.. note:: + + Considered for removal. + + +.. opcode:: SEQ - Set On Equal + +.. math:: + + dst.x = (src0.x == src1.x) ? 1 : 0 + + dst.y = (src0.y == src1.y) ? 1 : 0 + + dst.z = (src0.z == src1.z) ? 1 : 0 + + dst.w = (src0.w == src1.w) ? 1 : 0 + + +.. opcode:: SFL - Set On False + +This instruction replicates its result. + +.. math:: + + dst = 0 + +.. note:: + + Considered for removal. + + +.. opcode:: SGT - Set On Greater Than + +.. math:: + + dst.x = (src0.x > src1.x) ? 1 : 0 + + dst.y = (src0.y > src1.y) ? 1 : 0 + + dst.z = (src0.z > src1.z) ? 1 : 0 + + dst.w = (src0.w > src1.w) ? 1 : 0 + + +.. opcode:: SIN - Sine + +This instruction replicates its result. + +.. math:: + + dst = \sin{src.x} + + +.. opcode:: SLE - Set On Less Equal Than + +.. math:: + + dst.x = (src0.x <= src1.x) ? 1 : 0 + + dst.y = (src0.y <= src1.y) ? 1 : 0 + + dst.z = (src0.z <= src1.z) ? 1 : 0 + + dst.w = (src0.w <= src1.w) ? 1 : 0 + + +.. opcode:: SNE - Set On Not Equal + +.. math:: + + dst.x = (src0.x != src1.x) ? 1 : 0 + + dst.y = (src0.y != src1.y) ? 1 : 0 + + dst.z = (src0.z != src1.z) ? 1 : 0 + + dst.w = (src0.w != src1.w) ? 1 : 0 + + +.. opcode:: STR - Set On True + +This instruction replicates its result. + +.. math:: + + dst = 1 + + +.. opcode:: TEX - Texture Lookup + + TBD + + +.. opcode:: TXD - Texture Lookup with Derivatives + + TBD + + +.. opcode:: TXP - Projective Texture Lookup + + TBD + + +.. opcode:: UP2H - Unpack Two 16-Bit Floats + + TBD + +.. note:: + + Considered for removal. + +.. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars + + TBD + +.. note:: + + Considered for removal. + +.. opcode:: UP4B - Unpack Four Signed 8-Bit Values + + TBD + +.. note:: + + Considered for removal. + +.. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars + + TBD + +.. note:: + + Considered for removal. + +.. opcode:: X2D - 2D Coordinate Transformation + +.. math:: + + dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y + + dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w + + dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y + + dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w + +.. note:: + + Considered for removal. + + +.. opcode:: ARA - Address Register Add + + TBD + +.. note:: + + Considered for removal. + +.. opcode:: ARR - Address Register Load With Round + +.. math:: + + dst.x = round(src.x) + + dst.y = round(src.y) + + dst.z = round(src.z) + + dst.w = round(src.w) + + +.. opcode:: BRA - Branch + + pc = target + +.. note:: + + Considered for removal. + +.. opcode:: CAL - Subroutine Call + + push(pc) + pc = target + + +.. opcode:: RET - Subroutine Call Return + + pc = pop() + + Potential restrictions: + * Only occurs at end of function. + +.. opcode:: SSG - Set Sign + +.. math:: + + dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 + + dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 + + dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 + + dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 + + +.. opcode:: CMP - Compare + +.. math:: + + dst.x = (src0.x < 0) ? src1.x : src2.x + + dst.y = (src0.y < 0) ? src1.y : src2.y + + dst.z = (src0.z < 0) ? src1.z : src2.z + + dst.w = (src0.w < 0) ? src1.w : src2.w + + +.. opcode:: KIL - Conditional Discard + +.. math:: + + if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0) + discard + endif + + +.. opcode:: SCS - Sine Cosine + +.. math:: + + dst.x = \cos{src.x} + + dst.y = \sin{src.x} + + dst.z = 0 + + dst.y = 1 + + +.. opcode:: TXB - Texture Lookup With Bias + + TBD + + +.. opcode:: NRM - 3-component Vector Normalise + +.. math:: + + dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.w = 1 + + +.. opcode:: DIV - Divide + +.. math:: + + dst.x = \frac{src0.x}{src1.x} + + dst.y = \frac{src0.y}{src1.y} + + dst.z = \frac{src0.z}{src1.z} + + dst.w = \frac{src0.w}{src1.w} + + +.. opcode:: DP2 - 2-component Dot Product + +This instruction replicates its result. + +.. math:: + + dst = src0.x \times src1.x + src0.y \times src1.y + + +.. opcode:: TXL - Texture Lookup With LOD + + TBD + + +.. opcode:: BRK - Break + + TBD + + +.. opcode:: IF - If + + TBD + + +.. opcode:: ELSE - Else + + TBD + + +.. opcode:: ENDIF - End If + + TBD + + +.. opcode:: PUSHA - Push Address Register On Stack + + push(src.x) + push(src.y) + push(src.z) + push(src.w) + +.. note:: + + Considered for cleanup. + +.. note:: + + Considered for removal. + +.. opcode:: POPA - Pop Address Register From Stack + + dst.w = pop() + dst.z = pop() + dst.y = pop() + dst.x = pop() + +.. note:: + + Considered for cleanup. + +.. note:: + + Considered for removal. + + +Compute ISA +^^^^^^^^^^^^^^^^^^^^^^^^ + +These opcodes are primarily provided for special-use computational shaders. +Support for these opcodes indicated by a special pipe capability bit (TBD). + +XXX so let's discuss it, yeah? + +.. opcode:: CEIL - Ceiling + +.. math:: + + dst.x = \lceil src.x\rceil + + dst.y = \lceil src.y\rceil + + dst.z = \lceil src.z\rceil + + dst.w = \lceil src.w\rceil + + +.. opcode:: I2F - Integer To Float + +.. math:: + + dst.x = (float) src.x + + dst.y = (float) src.y + + dst.z = (float) src.z + + dst.w = (float) src.w + + +.. opcode:: NOT - Bitwise Not + +.. math:: + + dst.x = ~src.x + + dst.y = ~src.y + + dst.z = ~src.z + + dst.w = ~src.w + + +.. opcode:: TRUNC - Truncate + +.. math:: + + dst.x = trunc(src.x) + + dst.y = trunc(src.y) + + dst.z = trunc(src.z) + + dst.w = trunc(src.w) + + +.. opcode:: SHL - Shift Left + +.. math:: + + dst.x = src0.x << src1.x + + dst.y = src0.y << src1.x + + dst.z = src0.z << src1.x + + dst.w = src0.w << src1.x + + +.. opcode:: SHR - Shift Right + +.. math:: + + dst.x = src0.x >> src1.x + + dst.y = src0.y >> src1.x + + dst.z = src0.z >> src1.x + + dst.w = src0.w >> src1.x + + +.. opcode:: AND - Bitwise And + +.. math:: + + dst.x = src0.x & src1.x + + dst.y = src0.y & src1.y + + dst.z = src0.z & src1.z + + dst.w = src0.w & src1.w + + +.. opcode:: OR - Bitwise Or + +.. math:: + + dst.x = src0.x | src1.x + + dst.y = src0.y | src1.y + + dst.z = src0.z | src1.z + + dst.w = src0.w | src1.w + + +.. opcode:: MOD - Modulus + +.. math:: + + dst.x = src0.x \bmod src1.x + + dst.y = src0.y \bmod src1.y + + dst.z = src0.z \bmod src1.z + + dst.w = src0.w \bmod src1.w + + +.. opcode:: XOR - Bitwise Xor + +.. math:: + + dst.x = src0.x \oplus src1.x + + dst.y = src0.y \oplus src1.y + + dst.z = src0.z \oplus src1.z + + dst.w = src0.w \oplus src1.w + + +.. opcode:: SAD - Sum Of Absolute Differences + +.. math:: + + dst.x = |src0.x - src1.x| + src2.x + + dst.y = |src0.y - src1.y| + src2.y + + dst.z = |src0.z - src1.z| + src2.z + + dst.w = |src0.w - src1.w| + src2.w + + +.. opcode:: TXF - Texel Fetch + + TBD + + +.. opcode:: TXQ - Texture Size Query + + TBD + + +.. opcode:: CONT - Continue + + TBD + +.. note:: + + Support for CONT is determined by a special capability bit, + ``TGSI_CONT_SUPPORTED``. See :ref:`Screen` for more information. + + +Geometry ISA +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These opcodes are only supported in geometry shaders; they have no meaning +in any other type of shader. + +.. opcode:: EMIT - Emit + + TBD + + +.. opcode:: ENDPRIM - End Primitive + + TBD + + +GLSL ISA +^^^^^^^^^^ + +These opcodes are part of :term:`GLSL`'s opcode set. Support for these +opcodes is determined by a special capability bit, ``GLSL``. + +.. opcode:: BGNLOOP - Begin a Loop + + TBD + + +.. opcode:: BGNSUB - Begin Subroutine + + TBD + + +.. opcode:: ENDLOOP - End a Loop + + TBD + + +.. opcode:: ENDSUB - End Subroutine + + TBD + + +.. opcode:: NOP - No Operation + + Do nothing. + + +.. opcode:: NRM4 - 4-component Vector Normalise + +This instruction replicates its result. + +.. math:: + + dst = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + +ps_2_x +^^^^^^^^^^^^ + +XXX wait what + +.. opcode:: CALLNZ - Subroutine Call If Not Zero + + TBD + + +.. opcode:: IFC - If + + TBD + + +.. opcode:: BREAKC - Break Conditional + + TBD + +.. _doubleopcodes: + +Double ISA +^^^^^^^^^^^^^^^ + +The double-precision opcodes reinterpret four-component vectors into +two-component vectors with doubled precision in each component. + +Support for these opcodes is XXX undecided. :T + +.. opcode:: DADD - Add + +.. math:: + + dst.xy = src0.xy + src1.xy + + dst.zw = src0.zw + src1.zw + + +.. opcode:: DDIV - Divide + +.. math:: + + dst.xy = src0.xy / src1.xy + + dst.zw = src0.zw / src1.zw + +.. opcode:: DSEQ - Set on Equal + +.. math:: + + dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F + + dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F + +.. opcode:: DSLT - Set on Less than + +.. math:: + + dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F + + dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F + +.. opcode:: DFRAC - Fraction + +.. math:: + + dst.xy = src.xy - \lfloor src.xy\rfloor + + dst.zw = src.zw - \lfloor src.zw\rfloor + + +.. opcode:: DFRACEXP - Convert Number to Fractional and Integral Components + +Like the ``frexp()`` routine in many math libraries, this opcode stores the +exponent of its source to ``dst0``, and the significand to ``dst1``, such that +:math:`dst1 \times 2^{dst0} = src` . + +.. math:: + + dst0.xy = exp(src.xy) + + dst1.xy = frac(src.xy) + + dst0.zw = exp(src.zw) + + dst1.zw = frac(src.zw) + +.. opcode:: DLDEXP - Multiply Number by Integral Power of 2 + +This opcode is the inverse of :opcode:`DFRACEXP`. + +.. math:: + + dst.xy = src0.xy \times 2^{src1.xy} + + dst.zw = src0.zw \times 2^{src1.zw} + +.. opcode:: DMIN - Minimum + +.. math:: + + dst.xy = min(src0.xy, src1.xy) + + dst.zw = min(src0.zw, src1.zw) + +.. opcode:: DMAX - Maximum + +.. math:: + + dst.xy = max(src0.xy, src1.xy) + + dst.zw = max(src0.zw, src1.zw) + +.. opcode:: DMUL - Multiply + +.. math:: + + dst.xy = src0.xy \times src1.xy + + dst.zw = src0.zw \times src1.zw + + +.. opcode:: DMAD - Multiply And Add + +.. math:: + + dst.xy = src0.xy \times src1.xy + src2.xy + + dst.zw = src0.zw \times src1.zw + src2.zw + + +.. opcode:: DRCP - Reciprocal + +.. math:: + + dst.xy = \frac{1}{src.xy} + + dst.zw = \frac{1}{src.zw} + +.. opcode:: DSQRT - Square Root + +.. math:: + + dst.xy = \sqrt{src.xy} + + dst.zw = \sqrt{src.zw} + + +Explanation of symbols used +------------------------------ + + +Functions +^^^^^^^^^^^^^^ + + + :math:`|x|` Absolute value of `x`. + + :math:`\lceil x \rceil` Ceiling of `x`. + + clamp(x,y,z) Clamp x between y and z. + (x < y) ? y : (x > z) ? z : x + + :math:`\lfloor x\rfloor` Floor of `x`. + + :math:`\log_2{x}` Logarithm of `x`, base 2. + + max(x,y) Maximum of x and y. + (x > y) ? x : y + + min(x,y) Minimum of x and y. + (x < y) ? x : y + + partialx(x) Derivative of x relative to fragment's X. + + partialy(x) Derivative of x relative to fragment's Y. + + pop() Pop from stack. + + :math:`x^y` `x` to the power `y`. + + push(x) Push x on stack. + + round(x) Round x. + + trunc(x) Truncate x, i.e. drop the fraction bits. + + +Keywords +^^^^^^^^^^^^^ + + + discard Discard fragment. + + pc Program counter. + + target Label of target instruction. + + +Other tokens +--------------- + + +Declaration +^^^^^^^^^^^ + + +Declares a register that is will be referenced as an operand in Instruction +tokens. + +File field contains register file that is being declared and is one +of TGSI_FILE. + +UsageMask field specifies which of the register components can be accessed +and is one of TGSI_WRITEMASK. + +Interpolate field is only valid for fragment shader INPUT register files. +It specifes the way input is being interpolated by the rasteriser and is one +of TGSI_INTERPOLATE. + +If Dimension flag is set to 1, a Declaration Dimension token follows. + +If Semantic flag is set to 1, a Declaration Semantic token follows. + +CylindricalWrap bitfield is only valid for fragment shader INPUT register +files. It specifies which register components should be subject to cylindrical +wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X +is set to 1, the X component should be interpolated according to cylindrical +wrapping rules. + + +Declaration Semantic +^^^^^^^^^^^^^^^^^^^^^^^^ + + + Follows Declaration token if Semantic bit is set. + + Since its purpose is to link a shader with other stages of the pipeline, + it is valid to follow only those Declaration tokens that declare a register + either in INPUT or OUTPUT file. + + SemanticName field contains the semantic name of the register being declared. + There is no default value. + + SemanticIndex is an optional subscript that can be used to distinguish + different register declarations with the same semantic name. The default value + is 0. + + The meanings of the individual semantic names are explained in the following + sections. + +TGSI_SEMANTIC_POSITION +"""""""""""""""""""""" + +Position, sometimes known as HPOS or WPOS for historical reasons, is the +location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z`` +are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used +for the perspective divide, if enabled. + +As a vertex shader output, position should be scaled to the viewport. When +used in fragment shaders, position will be in window coordinates. The convention +used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties. + +XXX additionally, is there a way to configure the perspective divide? it's +accelerated on most chipsets AFAIK... + +Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can +be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``. + +XXX usually? can we solidify that? + +TGSI_SEMANTIC_COLOR +""""""""""""""""""" + +Colors are used to, well, color the primitives. Colors are always in +``(r, g, b, a)`` format. + +If alpha is not specified, it defaults to 1. + +TGSI_SEMANTIC_BCOLOR +"""""""""""""""""""" + +Back-facing colors are only used for back-facing polygons, and are only valid +in vertex shader outputs. After rasterization, all polygons are front-facing +and COLOR and BCOLOR end up occupying the same slots in the fragment, so +all BCOLORs effectively become regular COLORs in the fragment shader. + +TGSI_SEMANTIC_FOG +""""""""""""""""" + +The fog coordinate historically has been used to replace the depth coordinate +for generation of fog in dedicated fog blocks. Gallium, however, does not use +dedicated fog acceleration, placing it entirely in the fragment shader +instead. + +The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first +component matters when writing from the vertex shader; the driver will ensure +that the coordinate is in this format when used as a fragment shader input. + +TGSI_SEMANTIC_PSIZE +""""""""""""""""""" + +PSIZE, or point size, is used to specify point sizes per-vertex. It should +be in ``(s, 0, 0, 1)`` format, where ``s`` is the (possibly clamped) point size. +Only the first component matters when writing from the vertex shader. + +When using this semantic, be sure to set the appropriate state in the +:ref:`rasterizer` first. + +TGSI_SEMANTIC_GENERIC +""""""""""""""""""""" + +Generic semantics are nearly always used for texture coordinate attributes, +in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds +of lookups, and ``q`` is the level-of-detail bias for biased sampling. + +These attributes are called "generic" because they may be used for anything +else, including parameters, texture generation information, or anything that +can be stored inside a four-component vector. + +TGSI_SEMANTIC_NORMAL +"""""""""""""""""""" + +Vertex normal; could be used to implement per-pixel lighting for legacy APIs +that allow mixing fixed-function and programmable stages. + +TGSI_SEMANTIC_FACE +"""""""""""""""""" + +FACE is the facing bit, to store the facing information for the fragment +shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive +when the fragment is front-facing, and negative when the component is +back-facing. + +TGSI_SEMANTIC_EDGEFLAG +"""""""""""""""""""""" + +XXX no clue + + +Properties +^^^^^^^^^^^^^^^^^^^^^^^^ + + + Properties are general directives that apply to the whole TGSI program. + +FS_COORD_ORIGIN +""""""""""""""" + +Specifies the fragment shader TGSI_SEMANTIC_POSITION coordinate origin. +The default value is UPPER_LEFT. + +If UPPER_LEFT, the position will be (0,0) at the upper left corner and +increase downward and rightward. +If LOWER_LEFT, the position will be (0,0) at the lower left corner and +increase upward and rightward. + +OpenGL defaults to LOWER_LEFT, and is configurable with the +GL_ARB_fragment_coord_conventions extension. + +DirectX 9/10 use UPPER_LEFT. + +FS_COORD_PIXEL_CENTER +""""""""""""""""""""" + +Specifies the fragment shader TGSI_SEMANTIC_POSITION pixel center convention. +The default value is HALF_INTEGER. + +If HALF_INTEGER, the fractionary part of the position will be 0.5 +If INTEGER, the fractionary part of the position will be 0.0 + +Note that this does not affect the set of fragments generated by +rasterization, which is instead controlled by gl_rasterization_rules in the +rasterizer. + +OpenGL defaults to HALF_INTEGER, and is configurable with the +GL_ARB_fragment_coord_conventions extension. + +DirectX 9 uses INTEGER. +DirectX 10 uses HALF_INTEGER. + + + +Texture Sampling and Texture Formats +------------------------------------ + +This table shows how texture image components are returned as (x,y,z,w) tuples +by TGSI texture instructions, such as :opcode:`TEX`, :opcode:`TXD`, and +:opcode:`TXP`. For reference, OpenGL and Direct3D conventions are shown as +well. + ++--------------------+--------------+--------------------+--------------+ +| Texture Components | Gallium | OpenGL | Direct3D 9 | ++====================+==============+====================+==============+ +| R | (r, 0, 0, 1) | (r, 0, 0, 1) | (r, 1, 1, 1) | ++--------------------+--------------+--------------------+--------------+ +| RG | (r, g, 0, 1) | (r, g, 0, 1) | (r, g, 1, 1) | ++--------------------+--------------+--------------------+--------------+ +| RGB | (r, g, b, 1) | (r, g, b, 1) | (r, g, b, 1) | ++--------------------+--------------+--------------------+--------------+ +| RGBA | (r, g, b, a) | (r, g, b, a) | (r, g, b, a) | ++--------------------+--------------+--------------------+--------------+ +| A | (0, 0, 0, a) | (0, 0, 0, a) | (0, 0, 0, a) | ++--------------------+--------------+--------------------+--------------+ +| L | (l, l, l, 1) | (l, l, l, 1) | (l, l, l, 1) | ++--------------------+--------------+--------------------+--------------+ +| LA | (l, l, l, a) | (l, l, l, a) | (l, l, l, a) | ++--------------------+--------------+--------------------+--------------+ +| I | (i, i, i, i) | (i, i, i, i) | N/A | ++--------------------+--------------+--------------------+--------------+ +| UV | XXX TBD | (0, 0, 0, 1) | (u, v, 1, 1) | +| | | [#envmap-bumpmap]_ | | ++--------------------+--------------+--------------------+--------------+ +| Z | XXX TBD | (z, z, z, 1) | (0, z, 0, 1) | +| | | [#depth-tex-mode]_ | | ++--------------------+--------------+--------------------+--------------+ + +.. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt +.. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z) + or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE. |