16 files changed, 3048 insertions, 0 deletions
diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py
new file mode 100644
index 0000000000..ccc84405c4
--- /dev/null
+++ b/src/gallium/docs/source/conf.py
@@ -0,0 +1,197 @@
+# -*- coding: utf-8 -*-
+#
+# Gallium documentation build configuration file, created by
+# sphinx-quickstart on Sun Dec 20 14:09:05 2009.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.append(os.path.abspath('exts'))
+
+# -- General configuration -----------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.pngmath', 'tgsi']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'Gallium'
+copyright = u'2009, VMWare, X.org, Nouveau'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.4'
+# The full version, including alpha/beta/rc tags.
+release = '0.4'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of documents that shouldn't be included in the build.
+#unused_docs = []
+
+# List of directories, relative to source directory, that shouldn't be searched
+# for source files.
+exclude_trees = []
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# The language for highlighting source code.
+highlight_language = 'c'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  Major themes that come with
+# Sphinx are currently 'default' and 'sphinxdoc'.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_use_modindex = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = ''
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Galliumdoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+# The paper size ('letter' or 'a4').
+#latex_paper_size = 'letter'
+
+# The font size ('10pt', '11pt' or '12pt').
+#latex_font_size = '10pt'
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('index', 'Gallium.tex', u'Gallium Documentation',
+   u'VMWare, X.org, Nouveau', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# Additional stuff for the LaTeX preamble.
+#latex_preamble = ''
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_use_modindex = True
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
new file mode 100644
index 0000000000..4e35a4c408
--- /dev/null
+++ b/src/gallium/docs/source/context.rst
@@ -0,0 +1,367 @@
+.. _context:
+
+Context
+=======
+
+The context object represents the purest, most directly accessible, abilities
+of the device's 3D rendering pipeline.
+
+Methods
+-------
+
+CSO State
+^^^^^^^^^
+
+All CSO state is created, bound, and destroyed, with triplets of methods that
+all follow a specific naming scheme. For example, ``create_blend_state``,
+``bind_blend_state``, and ``destroy_blend_state``.
+
+CSO objects handled by the context object:
+
+* :ref:`Blend`: ``*_blend_state``
+* :ref:`Sampler`: These are special; they can be bound to either vertex or
+  fragment samplers, and they are bound in groups.
+  ``bind_fragment_sampler_states``, ``bind_vertex_sampler_states``
+* :ref:`Rasterizer`: ``*_rasterizer_state``
+* :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state``
+* :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for
+  fragment shaders, and ``*_vs_state`` is for vertex shaders.
+* :ref:`Vertex Elements`: ``*_vertex_elements_state``
+
+
+Resource Binding State
+^^^^^^^^^^^^^^^^^^^^^^
+
+This state describes how resources in various flavours (textures,
+buffers, surfaces) are bound to the driver.
+
+
+* ``set_constant_buffer`` sets a constant buffer to be used for a given shader
+  type. index is used to indicate which buffer to set (some apis may allow
+  multiple ones to be set, and binding a specific one later, though drivers
+  are mostly restricted to the first one right now).
+
+* ``set_framebuffer_state``
+
+* ``set_vertex_buffers``
+
+
+Non-CSO State
+^^^^^^^^^^^^^
+
+These pieces of state are too small, variable, and/or trivial to have CSO
+objects. They all follow simple, one-method binding calls, e.g.
+``set_blend_color``.
+
+* ``set_stencil_ref`` sets the stencil front and back reference values
+  which are used as comparison values in stencil test.
+* ``set_blend_color``
+* ``set_sample_mask``
+* ``set_clip_state``
+* ``set_polygon_stipple``
+* ``set_scissor_state`` sets the bounds for the scissor test, which culls
+  pixels before blending to render targets. If the :ref:`Rasterizer` does
+  not have the scissor test enabled, then the scissor bounds never need to
+  be set since they will not be used.
+* ``set_viewport_state``
+
+
+Sampler Views
+^^^^^^^^^^^^^
+
+These are the means to bind textures to shader stages. To create one, specify
+its format, swizzle and LOD range in sampler view template.
+
+If texture format is different than template format, it is said the texture
+is being cast to another format. Casting can be done only between compatible
+formats, that is formats that have matching component order and sizes.
+
+Swizzle fields specify they way in which fetched texel components are placed
+in the result register. For example, ``swizzle_r`` specifies what is going to be
+placed in first component of result register.
+
+The ``first_level`` and ``last_level`` fields of sampler view template specify
+the LOD range the texture is going to be constrained to.
+
+* ``set_fragment_sampler_views`` binds an array of sampler views to
+  fragment shader stage. Every binding point acquires a reference
+  to a respective sampler view and releases a reference to the previous
+  sampler view.
+
+* ``set_vertex_sampler_views`` binds an array of sampler views to vertex
+  shader stage. Every binding point acquires a reference to a respective
+  sampler view and releases a reference to the previous sampler view.
+
+* ``create_sampler_view`` creates a new sampler view. ``texture`` is associated
+  with the sampler view which results in sampler view holding a reference
+  to the texture. Format specified in template must be compatible
+  with texture format.
+
+* ``sampler_view_destroy`` destroys a sampler view and releases its reference
+  to associated texture.
+
+
+Clearing
+^^^^^^^^
+
+Clear is one of the most difficult concepts to nail down to a single
+interface (due to both different requirements from APIs and also driver/hw
+specific differences).
+
+``clear`` initializes some or all of the surfaces currently bound to
+the framebuffer to particular RGBA, depth, or stencil values.
+Currently, this does not take into account color or stencil write masks (as
+used by GL), and always clears the whole surfaces (no scissoring as used by
+GL clear or explicit rectangles like d3d9 uses). It can, however, also clear
+only depth or stencil in a combined depth/stencil surface, if the driver
+supports PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE.
+If a surface includes several layers/slices (XXX: not yet...) then all layers
+will be cleared.
+
+``clear_render_target`` clears a single color rendertarget with the specified
+color value. While it is only possible to clear one surface at a time (which can
+include several layers), this surface need not be bound to the framebuffer.
+
+``clear_depth_stencil`` clears a single depth, stencil or depth/stencil surface
+with the specified depth and stencil values (for combined depth/stencil buffers,
+is is also possible to only clear one or the other part). While it is only
+possible to clear one surface at a time (which can include several layers),
+this surface need not be bound to the framebuffer.
+
+
+Drawing
+^^^^^^^
+
+``draw_arrays`` draws a specified primitive.
+
+This command is equivalent to calling ``draw_arrays_instanced``
+with ``startInstance`` set to 0 and ``instanceCount`` set to 1.
+
+``draw_elements`` draws a specified primitive using an optional
+index buffer.
+
+This command is equivalent to calling ``draw_elements_instanced``
+with ``startInstance`` set to 0 and ``instanceCount`` set to 1.
+
+``draw_range_elements``
+
+XXX: this is (probably) a temporary entrypoint, as the range
+information should be available from the vertex_buffer state.
+Using this to quickly evaluate a specialized path in the draw
+module.
+
+``draw_arrays_instanced`` draws multiple instances of the same primitive.
+
+This command is equivalent to calling ``draw_elements_instanced``
+with ``indexBuffer`` set to NULL and ``indexSize`` set to 0.
+
+``draw_elements_instanced`` draws multiple instances of the same primitive
+using an optional index buffer.
+
+For instanceID in the range between ``startInstance``
+and ``startInstance``+``instanceCount``-1, inclusive, draw a primitive
+specified by ``mode`` and sequential numbers in the range between ``start``
+and ``start``+``count``-1, inclusive.
+
+If ``indexBuffer`` is not NULL, it specifies an index buffer with index
+byte size of ``indexSize``. The sequential numbers are used to lookup
+the index buffer and the resulting indices in turn are used to fetch
+vertex attributes.
+
+If ``indexBuffer`` is NULL, the sequential numbers are used directly
+as indices to fetch vertex attributes.
+
+``indexBias`` is a value which is added to every index read from the index 
+buffer before fetching vertex attributes.
+
+``minIndex`` and ``maxIndex`` describe minimum and maximum index contained in
+the index buffer.
+
+If a given vertex element has ``instance_divisor`` set to 0, it is said
+it contains per-vertex data and effective vertex attribute address needs
+to be recalculated for every index.
+
+  attribAddr = ``stride`` * index + ``src_offset``
+
+If a given vertex element has ``instance_divisor`` set to non-zero,
+it is said it contains per-instance data and effective vertex attribute
+address needs to recalculated for every ``instance_divisor``-th instance.
+
+  attribAddr = ``stride`` * instanceID / ``instance_divisor`` + ``src_offset``
+
+In the above formulas, ``src_offset`` is taken from the given vertex element
+and ``stride`` is taken from a vertex buffer associated with the given
+vertex element.
+
+The calculated attribAddr is used as an offset into the vertex buffer to
+fetch the attribute data.
+
+The value of ``instanceID`` can be read in a vertex shader through a system
+value register declared with INSTANCEID semantic name.
+
+
+Queries
+^^^^^^^
+
+Queries gather some statistic from the 3D pipeline over one or more
+draws.  Queries may be nested, though no state tracker currently
+exercises this.  
+
+Queries can be created with ``create_query`` and deleted with
+``destroy_query``. To start a query, use ``begin_query``, and when finished,
+use ``end_query`` to end the query.
+
+``get_query_result`` is used to retrieve the results of a query.  If
+the ``wait`` parameter is TRUE, then the ``get_query_result`` call
+will block until the results of the query are ready (and TRUE will be
+returned).  Otherwise, if the ``wait`` parameter is FALSE, the call
+will not block and the return value will be TRUE if the query has
+completed or FALSE otherwise.
+
+The most common type of query is the occlusion query,
+``PIPE_QUERY_OCCLUSION_COUNTER``, which counts the number of fragments which
+are written to the framebuffer without being culled by
+:ref:`Depth, Stencil, & Alpha` testing or shader KILL instructions.
+
+Another type of query, ``PIPE_QUERY_TIME_ELAPSED``, returns the amount of
+time, in nanoseconds, the context takes to perform operations.
+
+Gallium does not guarantee the availability of any query types; one must
+always check the capabilities of the :ref:`Screen` first.
+
+
+Conditional Rendering
+^^^^^^^^^^^^^^^^^^^^^
+
+A drawing command can be skipped depending on the outcome of a query
+(typically an occlusion query).  The ``render_condition`` function specifies
+the query which should be checked prior to rendering anything.
+
+If ``render_condition`` is called with ``query`` = NULL, conditional
+rendering is disabled and drawing takes place normally.
+
+If ``render_condition`` is called with a non-null ``query`` subsequent
+drawing commands will be predicated on the outcome of the query.  If
+the query result is zero subsequent drawing commands will be skipped.
+
+If ``mode`` is PIPE_RENDER_COND_WAIT the driver will wait for the
+query to complete before deciding whether to render.
+
+If ``mode`` is PIPE_RENDER_COND_NO_WAIT and the query has not yet
+completed, the drawing command will be executed normally.  If the query
+has completed, drawing will be predicated on the outcome of the query.
+
+If ``mode`` is PIPE_RENDER_COND_BY_REGION_WAIT or
+PIPE_RENDER_COND_BY_REGION_NO_WAIT rendering will be predicated as above
+for the non-REGION modes but in the case that an occulusion query returns
+a non-zero result, regions which were occluded may be ommitted by subsequent
+drawing commands.  This can result in better performance with some GPUs.
+Normally, if the occlusion query returned a non-zero result subsequent
+drawing happens normally so fragments may be generated, shaded and
+processed even where they're known to be obscured.
+
+
+Flushing
+^^^^^^^^
+
+``flush``
+
+
+Resource Busy Queries
+^^^^^^^^^^^^^^^^^^^^^
+
+``is_resource_referenced``
+
+
+
+Blitting
+^^^^^^^^
+
+These methods emulate classic blitter controls.
+
+These methods operate directly on ``pipe_resource`` objects, and stand
+apart from any 3D state in the context.  Blitting functionality may be
+moved to a separate abstraction at some point in the future.
+
+``resource_copy_region`` blits a region of a subresource of a resource to a
+region of another subresource of a resource, provided that both resources have the
+same format. The source and destination may be the same resource, but overlapping
+blits are not permitted.
+
+``resource_resolve`` resolves a multisampled resource into a non-multisampled
+one. Formats and dimensions must match. This function must be present if a driver
+supports multisampling.
+
+The interfaces to these calls are likely to change to make it easier
+for a driver to batch multiple blits with the same source and
+destination.
+
+
+Stream Output
+^^^^^^^^^^^^^
+
+Stream output, also known as transform feedback allows writing the results of the
+vertex pipeline (after the geometry shader or vertex shader if no geometry shader
+is present) to be written to a buffer created with a ``PIPE_BIND_STREAM_OUTPUT``
+flag.
+
+First a stream output state needs to be created with the
+``create_stream_output_state`` call. It specific the details of what's being written,
+to which buffer and with what kind of a writemask.
+
+Then target buffers needs to be set with the call to ``set_stream_output_buffers``
+which sets the buffers and the offsets from the start of those buffer to where
+the data will be written to.
+
+
+Transfers
+^^^^^^^^^
+
+These methods are used to get data to/from a resource.
+
+``get_transfer`` creates a transfer object.
+
+``transfer_destroy`` destroys the transfer object. May cause
+data to be written to the resource at this point.
+
+``transfer_map`` creates a memory mapping for the transfer object.
+The returned map points to the start of the mapped range according to
+the box region, not the beginning of the resource.
+
+``transfer_unmap`` remove the memory mapping for the transfer object.
+Any pointers into the map should be considered invalid and discarded.
+
+``transfer_inline_write`` performs a simplified transfer for simple writes.
+Basically get_transfer, transfer_map, data write, transfer_unmap, and
+transfer_destroy all in one.
+
+.. _transfer_flush_region:
+
+transfer_flush_region
+%%%%%%%%%%%%%%%%%%%%%
+
+If a transfer was created with ``FLUSH_EXPLICIT``, it will not automatically
+be flushed on write or unmap. Flushes must be requested with
+``transfer_flush_region``. Flush ranges are relative to the mapped range, not
+the beginning of the resource.
+
+.. _pipe_transfer:
+
+PIPE_TRANSFER
+^^^^^^^^^^^^^
+
+These flags control the behavior of a transfer object.
+
+* ``READ``: resource contents are read at transfer create time.
+* ``WRITE``: resource contents will be written back at transfer destroy time.
+* ``MAP_DIRECTLY``: a transfer should directly map the resource. May return
+  NULL if not supported.
+* ``DISCARD``: The memory within the mapped region is discarded.
+  Cannot be used with ``READ``.
+* ``DONTBLOCK``: Fail if the resource cannot be mapped immediately.
+* ``UNSYNCHRONIZED``: Do not synchronize pending operations on the resource
+  when mapping. The interaction of any writes to the map and any
+  operations pending on the resource are undefined. Cannot be used with
+  ``READ``.
+* ``FLUSH_EXPLICIT``: Written ranges will be notified later with
+  :ref:`transfer_flush_region`. Cannot be used with ``READ``.
diff --git a/src/gallium/docs/source/cso.rst b/src/gallium/docs/source/cso.rst
new file mode 100644
index 0000000000..dab1ee50f3
--- /dev/null
+++ b/src/gallium/docs/source/cso.rst
@@ -0,0 +1,14 @@
+CSO
+===
+
+CSO, Constant State Objects, are a core part of Gallium's API.
+
+CSO work on the principle of reusable state; they are created by filling
+out a state object with the desired properties, then passing that object
+to a context. The context returns an opaque context-specific handle which
+can be bound at any time for the desired effect.
+
+.. toctree::
+   :glob:
+
+   cso/*
diff --git a/src/gallium/docs/source/cso/blend.rst b/src/gallium/docs/source/cso/blend.rst
new file mode 100644
index 0000000000..c74396284c
--- /dev/null
+++ b/src/gallium/docs/source/cso/blend.rst
@@ -0,0 +1,55 @@
+.. _blend:
+
+Blend
+=====
+
+This state controls blending of the final fragments into the target rendering
+buffers.
+
+Blend Factors
+-------------
+
+The blend factors largely follow the same pattern as their counterparts
+in other modern and legacy drawing APIs.
+
+XXX blurb about dual-source blends
+
+Members
+-------
+
+independent_blend_enable
+   If enabled, blend state is different for each render target, and
+   for each render target set in the respective member of the rt array.
+   If disabled, blend state is the same for all render targets, and only
+   the first member of the rt array contains valid data.
+logicop_enable
+   Enables logic ops. Cannot be enabled at the same time as blending, and
+   is always the same for all render targets.
+logicop_func
+   The logic operation to use if logic ops are enabled. One of PIPE_LOGICOP.
+dither
+   Whether dithering is enabled. Note: Dithering is implementation-dependent.
+rt
+   Contains the per-rendertarget blend state.
+
+Per-rendertarget Members
+------------------------
+
+blend_enable
+   If blending is enabled, perform a blend calculation according to blend
+   functions and source/destination factors. Otherwise, the incoming fragment
+   color gets passed unmodified (but colormask still applies).
+rgb_func
+   The blend function to use for rgb channels. One of PIPE_BLEND.
+rgb_src_factor
+   The blend source factor to use for rgb channels. One of PIPE_BLENDFACTOR.
+rgb_dst_factor
+   The blend destination factor to use for rgb channels. One of PIPE_BLENDFACTOR.
+alpha_func
+   The blend function to use for the alpha channel. One of PIPE_BLEND.
+alpha_src_factor
+   The blend source factor to use for the alpha channel. One of PIPE_BLENDFACTOR.
+alpha_dst_factor
+   The blend destination factor to use for alpha channel. One of PIPE_BLENDFACTOR.
+colormask
+   Bitmask of which channels to write. Combination of PIPE_MASK bits.
diff --git a/src/gallium/docs/source/cso/dsa.rst b/src/gallium/docs/source/cso/dsa.rst
new file mode 100644
index 0000000000..1bbe381f9e
--- /dev/null
+++ b/src/gallium/docs/source/cso/dsa.rst
@@ -0,0 +1,61 @@
+.. _depth,stencil,&alpha:
+
+Depth, Stencil, & Alpha
+=======================
+
+These three states control the depth, stencil, and alpha tests, used to
+discard fragments that have passed through the fragment shader.
+
+Traditionally, these three tests have been clumped together in hardware, so
+they are all stored in one structure.
+
+During actual execution, the order of operations done on fragments is always:
+
+* Alpha
+* Stencil
+* Depth
+
+Depth Members
+-------------
+
+enabled
+    Whether the depth test is enabled.
+writemask
+    Whether the depth buffer receives depth writes.
+func
+    The depth test function. One of PIPE_FUNC.
+
+Stencil Members
+---------------
+
+enabled
+    Whether the stencil test is enabled. For the second stencil, whether the
+    two-sided stencil is enabled. If two-sided stencil is disabled, the other
+    fields for the second array member are not valid.
+func
+    The stencil test function. One of PIPE_FUNC.
+valuemask
+    Stencil test value mask; this is ANDed with the value in the stencil
+    buffer and the reference value before doing the stencil comparison test.
+writemask
+    Stencil test writemask; this controls which bits of the stencil buffer
+    are written.
+fail_op
+    The operation to carry out if the stencil test fails. One of
+    PIPE_STENCIL_OP.
+zfail_op
+    The operation to carry out if the stencil test passes but the depth test
+    fails. One of PIPE_STENCIL_OP.
+zpass_op
+    The operation to carry out if the stencil test and depth test both pass.
+    One of PIPE_STENCIL_OP.
+
+Alpha Members
+-------------
+
+enabled
+    Whether the alpha test is enabled.
+func
+    The alpha test function. One of PIPE_FUNC.
+ref_value
+    Alpha test reference value; used for certain functions.
diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst
new file mode 100644
index 0000000000..ad1612f93e
--- /dev/null
+++ b/src/gallium/docs/source/cso/rasterizer.rst
@@ -0,0 +1,188 @@
+.. _rasterizer:
+
+Rasterizer
+==========
+
+The rasterizer state controls the rendering of points, lines and triangles.
+Attributes include polygon culling state, line width, line stipple,
+multisample state, scissoring and flat/smooth shading.
+
+Shading
+-------
+
+flatshade
+^^^^^^^^^
+
+If set, the provoking vertex of each polygon is used to determine the color
+of the entire polygon.  If not set, fragment colors will be interpolated
+between the vertex colors.
+
+The actual interpolated shading algorithm is obviously
+implementation-dependent, but will usually be Gourard for most hardware.
+
+.. note::
+
+    This is separate from the fragment shader input attributes
+    CONSTANT, LINEAR and PERSPECTIVE. The flatshade state is needed at
+    clipping time to determine how to set the color of new vertices.
+
+    :ref:`Draw` can implement flat shading by copying the provoking vertex
+    color to all the other vertices in the primitive.
+
+flatshade_first
+^^^^^^^^^^^^^^^
+
+Whether the first vertex should be the provoking vertex, for most primitives.
+If not set, the last vertex is the provoking vertex.
+
+There are several important exceptions to the specification of this rule.
+
+* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first
+  vertex. If the caller wishes to change the provoking vertex, they merely
+  need to rotate the vertices themselves.
+* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no
+  effect; the provoking vertex is always the last vertex.
+* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the
+  second vertex, not the first. This permits each segment of the fan to have
+  a different color.
+
+Polygons
+--------
+
+light_twoside
+^^^^^^^^^^^^^
+
+If set, there are per-vertex back-facing colors.  The hardware
+(perhaps assisted by :ref:`Draw`) should be set up to use this state
+along with the front/back information to set the final vertex colors
+prior to rasterization.
+
+The frontface vertex shader color output is marked with TGSI semantic
+COLOR[0], and backface COLOR[1].
+
+front_ccw
+    Indicates whether the window order of front-facing polygons is
+    counter-clockwise (TRUE) or clockwise (FALSE).
+
+cull_mode
+    Indicates which faces of polygons to cull, either PIPE_FACE_NONE
+    (cull no polygons), PIPE_FACE_FRONT (cull front-facing polygons),
+    PIPE_FACE_BACK (cull back-facing polygons), or
+    PIPE_FACE_FRONT_AND_BACK (cull all polygons).
+
+fill_front
+    Indicates how to fill front-facing polygons, either
+    PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE or
+    PIPE_POLYGON_MODE_POINT.
+fill_back
+    Indicates how to fill back-facing polygons, either
+    PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE or
+    PIPE_POLYGON_MODE_POINT.
+
+poly_stipple_enable
+    Whether polygon stippling is enabled.
+poly_smooth
+    Controls OpenGL-style polygon smoothing/antialiasing
+
+offset_point
+    If set, point-filled polygons will have polygon offset factors applied
+offset_line
+    If set, line-filled polygons will have polygon offset factors applied
+offset_tri
+    If set, filled polygons will have polygon offset factors applied
+
+offset_units
+    Specifies the polygon offset bias
+offset_scale
+    Specifies the polygon offset scale
+
+
+
+Lines
+-----
+
+line_width
+    The width of lines.
+line_smooth
+    Whether lines should be smoothed. Line smoothing is simply anti-aliasing.
+line_stipple_enable
+    Whether line stippling is enabled.
+line_stipple_pattern
+    16-bit bitfield of on/off flags, used to pattern the line stipple.
+line_stipple_factor
+    When drawing a stippled line, each bit in the stipple pattern is
+    repeated N times, where N = line_stipple_factor + 1.
+line_last_pixel
+    Controls whether the last pixel in a line is drawn or not.  OpenGL
+    omits the last pixel to avoid double-drawing pixels at the ends of lines
+    when drawing connected lines.
+
+
+Points
+------
+
+sprite_coord_enable
+^^^^^^^^^^^^^^^^^^^
+
+Specifies if a texture unit has its texture coordinates replaced or not. This
+is a packed bitfield containing the enable for all texcoords -- if all bits
+are zero, point sprites are effectively disabled. If any bit is set, then
+point_smooth and point_quad_rasterization are ignored; point smoothing is
+disabled and points are always rasterized as quads. If enabled, the four
+vertices of the resulting quad will be assigned texture coordinates,
+according to sprite_coord_mode.
+
+sprite_coord_mode
+^^^^^^^^^^^^^^^^^
+
+Specifies how the value for each shader output should be computed when drawing
+point sprites. For PIPE_SPRITE_COORD_LOWER_LEFT, the lower-left vertex will
+have coordinates (0,0,0,1). For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left
+vertex will have coordinates (0,0,0,1).
+This state is used by :ref:`Draw` to generate texcoords.
+
+.. note::
+
+    When geometry shaders are available, a special geometry shader could be
+    used instead of this functionality, to convert incoming points into quads
+    with the proper texture coordinates.
+
+point_quad_rasterization
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Determines if points should be rasterized as quads or points. Certain APIs,
+like Direct3D, always use quad rasterization for points, regardless of
+whether point sprites are enabled or not. If this state is enabled, point
+smoothing and antialiasing are disabled. If it is disabled, point sprite
+coordinates are not generated.
+
+.. note::
+
+   Some renderers always internally translate points into quads; this state
+   still affects those renderers by overriding other rasterization state.
+
+point_smooth
+    Whether points should be smoothed. Point smoothing turns rectangular
+    points into circles or ovals.
+point_size_per_vertex
+    Whether the vertex shader is expected to have a point size output.
+    Undefined behaviour is permitted if there is disagreement between
+    this flag and the actual bound shader.
+point_size
+    The size of points, if not specified per-vertex.
+
+
+
+Other Members
+-------------
+
+scissor
+    Whether the scissor test is enabled.
+
+multisample
+    Whether :term:`MSAA` is enabled.
+
+gl_rasterization_rules
+    Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set,
+    the rasterizer will use (0, 0) for pixel centers.
+
diff --git a/src/gallium/docs/source/cso/sampler.rst b/src/gallium/docs/source/cso/sampler.rst
new file mode 100644
index 0000000000..9bbb784de8
--- /dev/null
+++ b/src/gallium/docs/source/cso/sampler.rst
@@ -0,0 +1,109 @@
+.. _sampler:
+
+Sampler
+=======
+
+Texture units have many options for selecting texels from loaded textures;
+this state controls an individual texture unit's texel-sampling settings.
+
+Texture coordinates are always treated as four-dimensional, and referred to
+with the traditional (S, T, R, Q) notation.
+
+Members
+-------
+
+wrap_s
+    How to wrap the S coordinate. One of PIPE_TEX_WRAP_*.
+wrap_t
+    How to wrap the T coordinate. One of PIPE_TEX_WRAP_*.
+wrap_r
+    How to wrap the R coordinate. One of PIPE_TEX_WRAP_*.
+
+The wrap modes are:
+
+* ``PIPE_TEX_WRAP_REPEAT``: Standard coord repeat/wrap-around mode.
+* ``PIPE_TEX_WRAP_CLAMP_TO_EDGE``: Clamp coord to edge of texture, the border
+  color is never sampled.
+* ``PIPE_TEX_WRAP_CLAMP_TO_BORDER``: Clamp coord to border of texture, the
+  border color is sampled when coords go outside the range [0,1].
+* ``PIPE_TEX_WRAP_CLAMP``: The coord is clamped to the range [0,1] before
+  scaling to the texture size.  This corresponds to the legacy OpenGL GL_CLAMP
+  texture wrap mode.  Historically, this mode hasn't acted consistantly across
+  all graphics hardware.  It sometimes acts like CLAMP_TO_EDGE or
+  CLAMP_TO_BORDER.  The behaviour may also vary depending on linear vs.
+  nearest sampling mode.
+* ``PIPE_TEX_WRAP_MIRROR_REPEAT``: If the integer part of the coordinate
+  is odd, the coord becomes (1 - coord).  Then, normal texture REPEAT is
+  applied to the coord.
+* ``PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE``: First, the absolute value of the
+  coordinate is computed.  Then, regular CLAMP_TO_EDGE is applied to the coord.
+* ``PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER``: First, the absolute value of the
+  coordinate is computed.  Then, regular CLAMP_TO_BORDER is applied to the
+  coord.
+* ``PIPE_TEX_WRAP_MIRROR_CLAMP``: First, the absolute value of the coord is
+  computed.  Then, regular CLAMP is applied to the coord.
+
+
+min_img_filter
+    The image filter to use when minifying texels. One of PIPE_TEX_FILTER_*.
+mag_img_filter
+    The image filter to use when magnifying texels. One of PIPE_TEX_FILTER_*.
+
+The texture image filter modes are:
+
+* ``PIPE_TEX_FILTER_NEAREST``: One texel is fetched from the texture image
+  at the texture coordinate.
+* ``PIPE_TEX_FILTER_LINEAR``: Two, four or eight texels (depending on the
+  texture dimensions; 1D/2D/3D) are fetched from the texture image and
+  linearly weighted and blended together.
+
+min_mip_filter
+    The filter to use when minifying mipmapped textures. One of
+    PIPE_TEX_MIPFILTER_*.
+
+The texture mip filter modes are:
+
+* ``PIPE_TEX_MIPFILTER_NEAREST``: A single mipmap level/image is selected
+  according to the texture LOD (lambda) value.
+* ``PIPE_TEX_MIPFILTER_LINEAR``: The two mipmap levels/images above/below
+  the texture LOD value are sampled from.  The results of sampling from
+  those two images are blended together with linear interpolation.
+* ``PIPE_TEX_MIPFILTER_NONE``: Mipmap filtering is disabled.  All texels
+  are taken from the level 0 image.
+
+
+compare_mode
+    If set to PIPE_TEX_COMPARE_R_TO_TEXTURE, the result of texture sampling
+    is not a color but a true/false value which is the result of comparing the
+    sampled texture value (typically a Z value from a depth texture) to the
+    texture coordinate's R component.
+    If set to PIPE_TEX_COMPARE_NONE, no comparison calculation is performed.
+compare_func
+    The inequality operator used when compare_mode=1.  One of PIPE_FUNC_x.
+normalized_coords
+    If set, the incoming texture coordinates (nominally in the range [0,1])
+    will be scaled by the texture width, height, depth to compute texel
+    addresses.  Otherwise, the texture coords are used as-is (they are not
+    scaled by the texture dimensions).
+    When normalized_coords=0, only a subset of the texture wrap modes are
+    allowed: PIPE_TEX_WRAP_CLAMP, PIPE_TEX_WRAP_CLAMP_TO_EDGE and
+    PIPE_TEX_WRAP_CLAMP_TO_BORDER.
+lod_bias
+    Bias factor which is added to the computed level of detail.
+    The normal level of detail is computed from the partial derivatives of
+    the texture coordinates and/or the fragment shader TEX/TXB/TXL
+    instruction.
+min_lod
+    Minimum level of detail, used to clamp LOD after bias.  The LOD values
+    correspond to mipmap levels where LOD=0 is the level 0 mipmap image.
+max_lod
+    Maximum level of detail, used to clamp LOD after bias.
+border_color
+    RGBA color used for texel coordinates that are outside the [0,width-1],
+    [0, height-1] or [0, depth-1] ranges.
+max_anisotropy
+    Maximum anistropy ratio to use when sampling from textures.  For example,
+    if max_anistropy=4, a region of up to 1 by 4 texels will be sampled.
+    Set to zero to disable anisotropic filtering.  Any other setting enables
+    anisotropic filtering, however it's not unexpected some drivers only will
+    change their filtering with a setting of 2 and higher.
diff --git a/src/gallium/docs/source/cso/shader.rst b/src/gallium/docs/source/cso/shader.rst
new file mode 100644
index 0000000000..0ee42c8787
--- /dev/null
+++ b/src/gallium/docs/source/cso/shader.rst
@@ -0,0 +1,12 @@
+.. _shader:
+
+Shader
+======
+
+One of the two types of shaders supported by Gallium.
+
+Members
+-------
+
+tokens
+    A list of tgsi_tokens.
diff --git a/src/gallium/docs/source/cso/velems.rst b/src/gallium/docs/source/cso/velems.rst
new file mode 100644
index 0000000000..92cde014fb
--- /dev/null
+++ b/src/gallium/docs/source/cso/velems.rst
@@ -0,0 +1,24 @@
+.. _vertexelements:
+
+Vertex Elements
+===============
+
+This state controls format etc. of the input attributes contained
+in the pipe_vertex_buffer(s). There's one pipe_vertex_element array member
+for each input attribute.
+
+Members
+-------
+
+src_offset
+    The byte offset of the attribute in the buffer given by
+    vertex_buffer_index for the first vertex.
+instance_divisor
+    The instance data rate divisor, used for instancing.
+    0 means this is per-vertex data, n means per-instance data used for
+    n consecutive instances (n > 0).
+vertex_buffer_index
+    The vertex buffer this attribute lives in. Several attributes may
+    live in the same vertex buffer.
+src_format
+    The format of the attribute data. One of the PIPE_FORMAT tokens.
diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst
new file mode 100644
index 0000000000..6ba5a056f4
--- /dev/null
+++ b/src/gallium/docs/source/distro.rst
@@ -0,0 +1,187 @@
+Distribution
+============
+
+Along with the interface definitions, the following drivers, state trackers,
+and auxiliary modules are shipped in the standard Gallium distribution.
+
+Drivers
+-------
+
+Cell
+^^^^
+
+Simple driver for the IBM Cell architecture. Runs faster than :ref:`softpipe`
+on Cell-based machines.
+
+Failover
+^^^^^^^^
+
+Broken and deprecated.
+
+Intel i915
+^^^^^^^^^^
+
+Driver for Intel i915 and i945 chipsets.
+
+Intel i965
+^^^^^^^^^^
+
+Highly experimental driver for Intel i965 chipsets.
+
+Identity
+^^^^^^^^
+
+Wrapper driver. The identity driver is a simple skeleton that passes through
+all of its :ref:`Context` and :ref:`Screen` methods to an underlying Context
+and Screen, and as such, it is an excellent starting point for new drivers.
+
+LLVM Softpipe
+^^^^^^^^^^^^^
+
+A version of :ref:`softpipe` that uses the Low-Level Virtual Machine to
+dynamically generate optimized rasterizing pipelines.
+
+nVidia nvfx
+^^^^^^^^^^^
+
+Driver for the nVidia nv30 and nv40 families of GPUs.
+
+nVidia nv50
+^^^^^^^^^^^
+
+Driver for the nVidia nv50 family of GPUs.
+
+VMWare SVGA
+^^^^^^^^^^^
+
+Driver for VMWare virtualized guest operating system graphics processing.
+
+ATI r300
+^^^^^^^^
+
+Driver for the ATI/AMD r300, r400, and r500 families of GPUs.
+
+.. _softpipe:
+
+Softpipe
+^^^^^^^^
+
+Reference software rasterizer. Slow but accurate.
+
+Trace
+^^^^^
+
+Wrapper driver. Trace dumps an XML record of the calls made to the
+:ref:`Context` and :ref:`Screen` objects that it wraps.
+
+State Trackers
+--------------
+
+.. _dri:
+
+Direct Rendering Infrastructure
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Tracker that implements the client-side DRI protocol, for providing direct
+acceleration services to X11 servers with the DRI extension. Supports DRI1
+and DRI2. Only GL is supported.
+
+.. _egl:
+
+EGL
+^^^
+
+Tracker for the Khronos EGL standard, used to set up GL and GLES contexts
+without extra knowledge of the underlying windowing system.
+
+GLX
+^^^
+
+MesaGL
+^^^^^^
+
+Tracker implementing a GL state machine. Not usable as a standalone tracker;
+Mesa should be built with another state tracker, such as :ref:`DRI` or
+:ref:`EGL`.
+
+Python
+^^^^^^
+
+OpenVG
+^^^^^^
+
+WGL
+^^^
+
+Xorg/XFree86 DDX
+^^^^^^^^^^^^^^^^
+
+Tracker for XFree86 and Xorg X11 servers. Provides device-dependent
+modesetting and acceleration as a DDX driver.
+
+Auxiliary
+---------
+
+OS
+^^
+
+The OS module contains the abstractions for basic operating system services:
+
+* memory allocation
+* simple message logging
+* obtaining run-time configuration option
+* threading primitives
+
+This is the bare minimum required to port Gallium to a new platform.
+
+The OS module already provides the implementations of these abstractions for
+the most common platforms.  When targeting an embedded platform no
+implementation will be provided -- these must be provided separately.
+
+CSO Cache
+^^^^^^^^^
+
+The CSO cache is used to accelerate preparation of state by saving
+driver-specific state structures for later use.
+
+.. _draw:
+
+Draw
+^^^^
+
+Draw is a software :term:`TCL` pipeline for hardware that lacks vertex shaders
+or other essential parts of pre-rasterization vertex preparation.
+
+Gallivm
+^^^^^^^
+
+Indices
+^^^^^^^
+
+Indices provides tools for translating or generating element indices for
+use with element-based rendering.
+
+Pipe Buffer Managers
+^^^^^^^^^^^^^^^^^^^^
+
+Each of these managers provides various services to drivers that are not
+fully utilizing a memory manager.
+
+Remote Debugger
+^^^^^^^^^^^^^^^
+
+Runtime Assembly Emission
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+TGSI
+^^^^
+
+The TGSI auxiliary module provides basic utilities for manipulating TGSI
+streams.
+
+Translate
+^^^^^^^^^
+
+Util
+^^^^
+
diff --git a/src/gallium/docs/source/exts/tgsi.py b/src/gallium/docs/source/exts/tgsi.py
new file mode 100644
index 0000000000..e92cd5c4d1
--- /dev/null
+++ b/src/gallium/docs/source/exts/tgsi.py
@@ -0,0 +1,17 @@
+# tgsi.py
+# Sphinx extension providing formatting for TGSI opcodes
+# (c) Corbin Simpson 2010
+
+import docutils.nodes
+import sphinx.addnodes
+
+def parse_opcode(env, sig, signode):
+    opcode, desc = sig.split("-", 1)
+    opcode = opcode.strip().upper()
+    desc = " (%s)" % desc.strip()
+    signode += sphinx.addnodes.desc_name(opcode, opcode)
+    signode += sphinx.addnodes.desc_annotation(desc, desc)
+    return opcode
+
+def setup(app):
+    app.add_description_unit("opcode", "opcode", "%s (TGSI opcode)", parse_opcode)
diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst
new file mode 100644
index 0000000000..acde56eafc
--- /dev/null
+++ b/src/gallium/docs/source/glossary.rst
@@ -0,0 +1,27 @@
+Glossary
+========
+
+.. glossary::
+   :sorted:
+
+   MSAA
+      Multi-Sampled Anti-Aliasing. A basic anti-aliasing technique that takes
+      multiple samples of the depth buffer, and uses this information to
+      smooth the edges of polygons.
+
+   TCL
+      Transform, Clipping, & Lighting. The three stages of preparation in a
+      rasterizing pipeline prior to the actual rasterization of vertices into
+      fragments.
+
+   NPOT
+      Non-power-of-two. Usually applied to textures which have at least one
+      dimension which is not a power of two.
+
+   LOD
+      Level of Detail. Also spelled "LoD." The value that determines when the
+      switches between mipmaps occur during texture sampling.
+
+   GLSL
+      GL Shading Language. The official, common high-level shader language used
+      in GL 2.0 and above.
diff --git a/src/gallium/docs/source/index.rst b/src/gallium/docs/source/index.rst
new file mode 100644
index 0000000000..54bc883fce
--- /dev/null
+++ b/src/gallium/docs/source/index.rst
@@ -0,0 +1,28 @@
+.. Gallium documentation master file, created by
+   sphinx-quickstart on Sun Dec 20 14:09:05 2009.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Gallium's documentation!
+===================================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   intro
+   tgsi
+   screen
+   context
+   cso
+   distro
+   glossary
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/src/gallium/docs/source/intro.rst b/src/gallium/docs/source/intro.rst
new file mode 100644
index 0000000000..1ea103840a
--- /dev/null
+++ b/src/gallium/docs/source/intro.rst
@@ -0,0 +1,9 @@
+Introduction
+============
+
+What is Gallium?
+----------------
+
+Gallium is essentially an API for writing graphics drivers in a largely
+device-agnostic fashion. It provides several objects which encapsulate the
+core services of graphics hardware in a straightforward manner.
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
new file mode 100644
index 0000000000..48d9d570b6
--- /dev/null
+++ b/src/gallium/docs/source/screen.rst
@@ -0,0 +1,279 @@
+.. _screen:
+
+Screen
+======
+
+A screen is an object representing the context-independent part of a device.
+
+Flags and enumerations
+----------------------
+
+XXX some of these don't belong in this section.
+
+
+.. _pipe_cap:
+
+PIPE_CAP_*
+^^^^^^^^^^
+
+Capability queries return information about the features and limits of the
+driver/GPU.  For floating-point values, use :ref:`get_paramf`, and for boolean
+or integer values, use :ref:`get_param`.
+
+The integer capabilities:
+
+* ``MAX_TEXTURE_IMAGE_UNITS``: The maximum number of samplers available.
+* ``NPOT_TEXTURES``: Whether :term:`NPOT` textures may have repeat modes,
+  normalized coordinates, and mipmaps.
+* ``TWO_SIDED_STENCIL``: Whether the stencil test can also affect back-facing
+  polygons.
+* ``GLSL``: Deprecated.
+* ``DUAL_SOURCE_BLEND``: Whether dual-source blend factors are supported. See
+  :ref:`Blend` for more information.
+* ``ANISOTROPIC_FILTER``: Whether textures can be filtered anisotropically.
+* ``POINT_SPRITE``: Whether point sprites are available.
+* ``MAX_RENDER_TARGETS``: The maximum number of render targets that may be
+  bound.
+* ``OCCLUSION_QUERY``: Whether occlusion queries are available.
+* ``TIMER_QUERY``: Whether timer queries are available.
+* ``TEXTURE_SHADOW_MAP``: XXX
+* ``MAX_TEXTURE_2D_LEVELS``: The maximum number of mipmap levels available
+  for a 2D texture.
+* ``MAX_TEXTURE_3D_LEVELS``: The maximum number of mipmap levels available
+  for a 3D texture.
+* ``MAX_TEXTURE_CUBE_LEVELS``: The maximum number of mipmap levels available
+  for a cubemap.
+* ``TEXTURE_MIRROR_CLAMP``: Whether mirrored texture coordinates with clamp
+  are supported.
+* ``TEXTURE_MIRROR_REPEAT``: Whether mirrored repeating texture coordinates
+  are supported.
+* ``MAX_VERTEX_TEXTURE_UNITS``: The maximum number of samplers addressable
+  inside the vertex shader. If this is 0, then the vertex shader cannot
+  sample textures.
+* ``TGSI_CONT_SUPPORTED``: Whether the TGSI CONT opcode is supported.
+* ``BLEND_EQUATION_SEPARATE``: Whether alpha blend equations may be different
+  from color blend equations, in :ref:`Blend` state.
+* ``SM3``: Whether the vertex shader and fragment shader support equivalent
+  opcodes to the Shader Model 3 specification. XXX oh god this is horrible
+* ``MAX_PREDICATE_REGISTERS``: XXX
+* ``MAX_COMBINED_SAMPLERS``: The total number of samplers accessible from
+  the vertex and fragment shader, inclusive.
+* ``MAX_CONST_BUFFERS``: Maximum number of constant buffers that can be bound
+  to any shader stage using ``set_constant_buffer``. If 0 or 1, the pipe will
+  only permit binding one constant buffer per shader, and the shaders will
+  not permit two-dimensional access to constants.
+
+If a value greater than 0 is returned, the driver can have multiple
+constant buffers bound to shader stages. The CONST register file can
+be accessed with two-dimensional indices, like in the example below.
+
+DCL CONST[0][0..7]       # declare first 8 vectors of constbuf 0
+DCL CONST[3][0]          # declare first vector of constbuf 3
+MOV OUT[0], CONST[0][3]  # copy vector 3 of constbuf 0
+
+For backwards compatibility, one-dimensional access to CONST register
+file is still supported. In that case, the constbuf index is assumed
+to be 0.
+
+* ``MAX_CONST_BUFFER_SIZE``: Maximum byte size of a single constant buffer.
+* ``INDEP_BLEND_ENABLE``: Whether per-rendertarget blend enabling and channel
+  masks are supported. If 0, then the first rendertarget's blend mask is
+  replicated across all MRTs.
+* ``INDEP_BLEND_FUNC``: Whether per-rendertarget blend functions are
+  available. If 0, then the first rendertarget's blend functions affect all
+  MRTs.
+* ``PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT``: Whether the TGSI property
+  FS_COORD_ORIGIN with value UPPER_LEFT is supported.
+* ``PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT``: Whether the TGSI property
+  FS_COORD_ORIGIN with value LOWER_LEFT is supported.
+* ``PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER``: Whether the TGSI
+  property FS_COORD_PIXEL_CENTER with value HALF_INTEGER is supported.
+* ``PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER``: Whether the TGSI
+  property FS_COORD_PIXEL_CENTER with value INTEGER is supported.
+
+The floating-point capabilities:
+
+* ``MAX_LINE_WIDTH``: The maximum width of a regular line.
+* ``MAX_LINE_WIDTH_AA``: The maximum width of a smoothed line.
+* ``MAX_POINT_WIDTH``: The maximum width and height of a point.
+* ``MAX_POINT_WIDTH_AA``: The maximum width and height of a smoothed point.
+* ``MAX_TEXTURE_ANISOTROPY``: The maximum level of anisotropy that can be
+  applied to anisotropically filtered textures.
+* ``MAX_TEXTURE_LOD_BIAS``: The maximum :term:`LOD` bias that may be applied
+  to filtered textures.
+* ``GUARD_BAND_LEFT``, ``GUARD_BAND_TOP``, ``GUARD_BAND_RIGHT``,
+  ``GUARD_BAND_BOTTOM``: XXX
+
+Fragment shader limits:
+
+* ``PIPE_CAP_MAX_FS_INSTRUCTIONS``: The maximum number of instructions.
+* ``PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS``: The maximum number of arithmetic instructions.
+* ``PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS``: The maximum number of texture instructions.
+* ``PIPE_CAP_MAX_FS_TEX_INDIRECTIONS``: The maximum number of texture indirections.
+* ``PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH``: The maximum nested control flow depth.
+* ``PIPE_CAP_MAX_FS_INPUTS``: The maximum number of input registers.
+* ``PIPE_CAP_MAX_FS_CONSTS``: The maximum number of constants.
+* ``PIPE_CAP_MAX_FS_TEMPS``: The maximum number of temporary registers.
+* ``PIPE_CAP_MAX_FS_ADDRS``: The maximum number of address registers.
+* ``PIPE_CAP_MAX_FS_PREDS``: The maximum number of predicate registers.
+
+Vertex shader limits:
+
+* ``PIPE_CAP_MAX_VS_*``: Identical to ``PIPE_CAP_MAX_FS_*``.
+
+
+.. _pipe_bind:
+
+PIPE_BIND_*
+^^^^^^^^^^^
+
+These flags indicate how a resource will be used and are specified at resource
+creation time. Resources may be used in different roles
+during their lifecycle. Bind flags are cumulative and may be combined to create
+a resource which can be used for multiple things.
+Depending on the pipe driver's memory management and these bind flags,
+resources might be created and handled quite differently.
+
+* ``PIPE_BIND_RENDER_TARGET``: A color buffer or pixel buffer which will be
+  rendered to.  Any surface/resource attached to pipe_framebuffer_state::cbufs
+  must have this flag set.
+* ``PIPE_BIND_DEPTH_STENCIL``: A depth (Z) buffer and/or stencil buffer. Any
+  depth/stencil surface/resource attached to pipe_framebuffer_state::zsbuf must
+  have this flag set.
+* ``PIPE_BIND_DISPLAY_TARGET``: A surface that can be presented to screen. Arguments to
+  pipe_screen::flush_front_buffer must have this flag set.
+* ``PIPE_BIND_SAMPLER_VIEW``: A texture that may be sampled from in a fragment
+  or vertex shader.
+* ``PIPE_BIND_VERTEX_BUFFER``: A vertex buffer.
+* ``PIPE_BIND_INDEX_BUFFER``: An vertex index/element buffer.
+* ``PIPE_BIND_CONSTANT_BUFFER``: A buffer of shader constants.
+* ``PIPE_BIND_TRANSFER_WRITE``: A transfer object which will be written to.
+* ``PIPE_BIND_TRANSFER_READ``: A transfer object which will be read from.
+* ``PIPE_BIND_CUSTOM``:
+* ``PIPE_BIND_SCANOUT``: A front color buffer or scanout buffer.
+* ``PIPE_BIND_SHARED``: A sharable buffer that can be given to another
+  process.
+
+.. _pipe_usage:
+
+PIPE_USAGE_*
+^^^^^^^^^^^^
+
+The PIPE_USAGE enums are hints about the expected usage pattern of a resource.
+
+* ``PIPE_USAGE_DEFAULT``: Expect many uploads to the resource, intermixed with draws.
+* ``PIPE_USAGE_DYNAMIC``: Expect many uploads to the resource, intermixed with draws.
+* ``PIPE_USAGE_STATIC``: Same as immutable (?)
+* ``PIPE_USAGE_IMMUTABLE``: Resource will not be changed after first upload.
+* ``PIPE_USAGE_STREAM``: Upload will be followed by draw, followed by upload, ...
+
+
+
+PIPE_TEXTURE_GEOM
+^^^^^^^^^^^^^^^^^
+
+These flags are used when querying whether a particular pipe_format is
+supported by the driver (with the `is_format_supported` function).
+Some formats may only be supported for certain kinds of textures.
+For example, a compressed format might only be used for POT textures.
+
+* ``PIPE_TEXTURE_GEOM_NON_SQUARE``: The texture may not be square
+* ``PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO``: The texture dimensions may not be
+  powers of two.
+
+
+Methods
+-------
+
+XXX to-do
+
+get_name
+^^^^^^^^
+
+Returns an identifying name for the screen.
+
+get_vendor
+^^^^^^^^^^
+
+Returns the screen vendor.
+
+.. _get_param:
+
+get_param
+^^^^^^^^^
+
+Get an integer/boolean screen parameter.
+
+**param** is one of the :ref:`PIPE_CAP` names.
+
+.. _get_paramf:
+
+get_paramf
+^^^^^^^^^^
+
+Get a floating-point screen parameter.
+
+**param** is one of the :ref:`PIPE_CAP` names.
+
+context_create
+^^^^^^^^^^^^^^
+
+Create a pipe_context.
+
+**priv** is private data of the caller, which may be put to various
+unspecified uses, typically to do with implementing swapbuffers
+and/or front-buffer rendering.
+
+is_format_supported
+^^^^^^^^^^^^^^^^^^^
+
+Determine if a resource in the given format can be used in a specific manner.
+
+**format** the resource format
+
+**target** one of the PIPE_TEXTURE_x flags
+
+**sample_count** the number of samples. 0 and 1 mean no multisampling,
+the maximum allowed legal value is 32.
+
+**bindings** is a bitmask of :ref:`PIPE_BIND` flags.
+
+**geom_flags** is a bitmask of PIPE_TEXTURE_GEOM_x flags.
+
+Returns TRUE if all usages can be satisfied.
+
+.. _resource_create:
+
+resource_create
+^^^^^^^^^^^^^^^
+
+Create a new resource from a template.
+The following fields of the pipe_resource must be specified in the template:
+
+target
+
+format
+
+width0
+
+height0
+
+depth0
+
+last_level
+
+nr_samples
+
+usage
+
+bind
+
+flags
+
+
+
+resource_destroy
+^^^^^^^^^^^^^^^^
+
+Destroy a resource. A resource is destroyed if it has no more references.
+
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
new file mode 100644
index 0000000000..ecab7cb809
--- /dev/null
+++ b/src/gallium/docs/source/tgsi.rst
@@ -0,0 +1,1474 @@
+TGSI
+====
+
+TGSI, Tungsten Graphics Shader Infrastructure, is an intermediate language
+for describing shaders. Since Gallium is inherently shaderful, shaders are
+an important part of the API. TGSI is the only intermediate representation
+used by all drivers.
+
+Basics
+------
+
+All TGSI instructions, known as *opcodes*, operate on arbitrary-precision
+floating-point four-component vectors. An opcode may have up to one
+destination register, known as *dst*, and between zero and three source
+registers, called *src0* through *src2*, or simply *src* if there is only
+one.
+
+Some instructions, like :opcode:`I2F`, permit re-interpretation of vector
+components as integers. Other instructions permit using registers as
+two-component vectors with double precision; see :ref:`Double Opcodes`.
+
+When an instruction has a scalar result, the result is usually copied into
+each of the components of *dst*. When this happens, the result is said to be
+*replicated* to *dst*. :opcode:`RCP` is one such instruction.
+
+Instruction Set
+---------------
+
+Core ISA
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These opcodes are guaranteed to be available regardless of the driver being
+used.
+
+.. opcode:: ARL - Address Register Load
+
+.. math::
+
+  dst.x = \lfloor src.x\rfloor
+
+  dst.y = \lfloor src.y\rfloor
+
+  dst.z = \lfloor src.z\rfloor
+
+  dst.w = \lfloor src.w\rfloor
+
+
+.. opcode:: MOV - Move
+
+.. math::
+
+  dst.x = src.x
+
+  dst.y = src.y
+
+  dst.z = src.z
+
+  dst.w = src.w
+
+
+.. opcode:: LIT - Light Coefficients
+
+.. math::
+
+  dst.x = 1
+
+  dst.y = max(src.x, 0)
+
+  dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
+
+  dst.w = 1
+
+
+.. opcode:: RCP - Reciprocal
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = \frac{1}{src.x}
+
+
+.. opcode:: RSQ - Reciprocal Square Root
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = \frac{1}{\sqrt{|src.x|}}
+
+
+.. opcode:: EXP - Approximate Exponential Base 2
+
+.. math::
+
+  dst.x = 2^{\lfloor src.x\rfloor}
+
+  dst.y = src.x - \lfloor src.x\rfloor
+
+  dst.z = 2^{src.x}
+
+  dst.w = 1
+
+
+.. opcode:: LOG - Approximate Logarithm Base 2
+
+.. math::
+
+  dst.x = \lfloor\log_2{|src.x|}\rfloor
+
+  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
+
+  dst.z = \log_2{|src.x|}
+
+  dst.w = 1
+
+
+.. opcode:: MUL - Multiply
+
+.. math::
+
+  dst.x = src0.x \times src1.x
+
+  dst.y = src0.y \times src1.y
+
+  dst.z = src0.z \times src1.z
+
+  dst.w = src0.w \times src1.w
+
+
+.. opcode:: ADD - Add
+
+.. math::
+
+  dst.x = src0.x + src1.x
+
+  dst.y = src0.y + src1.y
+
+  dst.z = src0.z + src1.z
+
+  dst.w = src0.w + src1.w
+
+
+.. opcode:: DP3 - 3-component Dot Product
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+
+
+.. opcode:: DP4 - 4-component Dot Product
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
+
+
+.. opcode:: DST - Distance Vector
+
+.. math::
+
+  dst.x = 1
+
+  dst.y = src0.y \times src1.y
+
+  dst.z = src0.z
+
+  dst.w = src1.w
+
+
+.. opcode:: MIN - Minimum
+
+.. math::
+
+  dst.x = min(src0.x, src1.x)
+
+  dst.y = min(src0.y, src1.y)
+
+  dst.z = min(src0.z, src1.z)
+
+  dst.w = min(src0.w, src1.w)
+
+
+.. opcode:: MAX - Maximum
+
+.. math::
+
+  dst.x = max(src0.x, src1.x)
+
+  dst.y = max(src0.y, src1.y)
+
+  dst.z = max(src0.z, src1.z)
+
+  dst.w = max(src0.w, src1.w)
+
+
+.. opcode:: SLT - Set On Less Than
+
+.. math::
+
+  dst.x = (src0.x < src1.x) ? 1 : 0
+
+  dst.y = (src0.y < src1.y) ? 1 : 0
+
+  dst.z = (src0.z < src1.z) ? 1 : 0
+
+  dst.w = (src0.w < src1.w) ? 1 : 0
+
+
+.. opcode:: SGE - Set On Greater Equal Than
+
+.. math::
+
+  dst.x = (src0.x >= src1.x) ? 1 : 0
+
+  dst.y = (src0.y >= src1.y) ? 1 : 0
+
+  dst.z = (src0.z >= src1.z) ? 1 : 0
+
+  dst.w = (src0.w >= src1.w) ? 1 : 0
+
+
+.. opcode:: MAD - Multiply And Add
+
+.. math::
+
+  dst.x = src0.x \times src1.x + src2.x
+
+  dst.y = src0.y \times src1.y + src2.y
+
+  dst.z = src0.z \times src1.z + src2.z
+
+  dst.w = src0.w \times src1.w + src2.w
+
+
+.. opcode:: SUB - Subtract
+
+.. math::
+
+  dst.x = src0.x - src1.x
+
+  dst.y = src0.y - src1.y
+
+  dst.z = src0.z - src1.z
+
+  dst.w = src0.w - src1.w
+
+
+.. opcode:: LRP - Linear Interpolate
+
+.. math::
+
+  dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x
+
+  dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y
+
+  dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z
+
+  dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
+
+
+.. opcode:: CND - Condition
+
+.. math::
+
+  dst.x = (src2.x > 0.5) ? src0.x : src1.x
+
+  dst.y = (src2.y > 0.5) ? src0.y : src1.y
+
+  dst.z = (src2.z > 0.5) ? src0.z : src1.z
+
+  dst.w = (src2.w > 0.5) ? src0.w : src1.w
+
+
+.. opcode:: DP2A - 2-component Dot Product And Add
+
+.. math::
+
+  dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x
+
+  dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x
+
+  dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x
+
+  dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
+
+
+.. opcode:: FRC - Fraction
+
+.. math::
+
+  dst.x = src.x - \lfloor src.x\rfloor
+
+  dst.y = src.y - \lfloor src.y\rfloor
+
+  dst.z = src.z - \lfloor src.z\rfloor
+
+  dst.w = src.w - \lfloor src.w\rfloor
+
+
+.. opcode:: CLAMP - Clamp
+
+.. math::
+
+  dst.x = clamp(src0.x, src1.x, src2.x)
+
+  dst.y = clamp(src0.y, src1.y, src2.y)
+
+  dst.z = clamp(src0.z, src1.z, src2.z)
+
+  dst.w = clamp(src0.w, src1.w, src2.w)
+
+
+.. opcode:: FLR - Floor
+
+This is identical to :opcode:`ARL`.
+
+.. math::
+
+  dst.x = \lfloor src.x\rfloor
+
+  dst.y = \lfloor src.y\rfloor
+
+  dst.z = \lfloor src.z\rfloor
+
+  dst.w = \lfloor src.w\rfloor
+
+
+.. opcode:: ROUND - Round
+
+.. math::
+
+  dst.x = round(src.x)
+
+  dst.y = round(src.y)
+
+  dst.z = round(src.z)
+
+  dst.w = round(src.w)
+
+
+.. opcode:: EX2 - Exponential Base 2
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = 2^{src.x}
+
+
+.. opcode:: LG2 - Logarithm Base 2
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = \log_2{src.x}
+
+
+.. opcode:: POW - Power
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = src0.x^{src1.x}
+
+.. opcode:: XPD - Cross Product
+
+.. math::
+
+  dst.x = src0.y \times src1.z - src1.y \times src0.z
+
+  dst.y = src0.z \times src1.x - src1.z \times src0.x
+
+  dst.z = src0.x \times src1.y - src1.x \times src0.y
+
+  dst.w = 1
+
+
+.. opcode:: ABS - Absolute
+
+.. math::
+
+  dst.x = |src.x|
+
+  dst.y = |src.y|
+
+  dst.z = |src.z|
+
+  dst.w = |src.w|
+
+
+.. opcode:: RCC - Reciprocal Clamped
+
+This instruction replicates its result.
+
+XXX cleanup on aisle three
+
+.. math::
+
+  dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
+
+
+.. opcode:: DPH - Homogeneous Dot Product
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
+
+
+.. opcode:: COS - Cosine
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = \cos{src.x}
+
+
+.. opcode:: DDX - Derivative Relative To X
+
+.. math::
+
+  dst.x = partialx(src.x)
+
+  dst.y = partialx(src.y)
+
+  dst.z = partialx(src.z)
+
+  dst.w = partialx(src.w)
+
+
+.. opcode:: DDY - Derivative Relative To Y
+
+.. math::
+
+  dst.x = partialy(src.x)
+
+  dst.y = partialy(src.y)
+
+  dst.z = partialy(src.z)
+
+  dst.w = partialy(src.w)
+
+
+.. opcode:: KILP - Predicated Discard
+
+  discard
+
+
+.. opcode:: PK2H - Pack Two 16-bit Floats
+
+  TBD
+
+
+.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
+
+  TBD
+
+
+.. opcode:: PK4B - Pack Four Signed 8-bit Scalars
+
+  TBD
+
+
+.. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
+
+  TBD
+
+
+.. opcode:: RFL - Reflection Vector
+
+.. math::
+
+  dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x
+
+  dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y
+
+  dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z
+
+  dst.w = 1
+
+.. note::
+
+   Considered for removal.
+
+
+.. opcode:: SEQ - Set On Equal
+
+.. math::
+
+  dst.x = (src0.x == src1.x) ? 1 : 0
+
+  dst.y = (src0.y == src1.y) ? 1 : 0
+
+  dst.z = (src0.z == src1.z) ? 1 : 0
+
+  dst.w = (src0.w == src1.w) ? 1 : 0
+
+
+.. opcode:: SFL - Set On False
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = 0
+
+.. note::
+
+   Considered for removal.
+
+
+.. opcode:: SGT - Set On Greater Than
+
+.. math::
+
+  dst.x = (src0.x > src1.x) ? 1 : 0
+
+  dst.y = (src0.y > src1.y) ? 1 : 0
+
+  dst.z = (src0.z > src1.z) ? 1 : 0
+
+  dst.w = (src0.w > src1.w) ? 1 : 0
+
+
+.. opcode:: SIN - Sine
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = \sin{src.x}
+
+
+.. opcode:: SLE - Set On Less Equal Than
+
+.. math::
+
+  dst.x = (src0.x <= src1.x) ? 1 : 0
+
+  dst.y = (src0.y <= src1.y) ? 1 : 0
+
+  dst.z = (src0.z <= src1.z) ? 1 : 0
+
+  dst.w = (src0.w <= src1.w) ? 1 : 0
+
+
+.. opcode:: SNE - Set On Not Equal
+
+.. math::
+
+  dst.x = (src0.x != src1.x) ? 1 : 0
+
+  dst.y = (src0.y != src1.y) ? 1 : 0
+
+  dst.z = (src0.z != src1.z) ? 1 : 0
+
+  dst.w = (src0.w != src1.w) ? 1 : 0
+
+
+.. opcode:: STR - Set On True
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = 1
+
+
+.. opcode:: TEX - Texture Lookup
+
+  TBD
+
+
+.. opcode:: TXD - Texture Lookup with Derivatives
+
+  TBD
+
+
+.. opcode:: TXP - Projective Texture Lookup
+
+  TBD
+
+
+.. opcode:: UP2H - Unpack Two 16-Bit Floats
+
+  TBD
+
+.. note::
+
+   Considered for removal.
+
+.. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars
+
+  TBD
+
+.. note::
+
+   Considered for removal.
+
+.. opcode:: UP4B - Unpack Four Signed 8-Bit Values
+
+  TBD
+
+.. note::
+
+   Considered for removal.
+
+.. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars
+
+  TBD
+
+.. note::
+
+   Considered for removal.
+
+.. opcode:: X2D - 2D Coordinate Transformation
+
+.. math::
+
+  dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y
+
+  dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w
+
+  dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y
+
+  dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
+
+.. note::
+
+   Considered for removal.
+
+
+.. opcode:: ARA - Address Register Add
+
+  TBD
+
+.. note::
+
+   Considered for removal.
+
+.. opcode:: ARR - Address Register Load With Round
+
+.. math::
+
+  dst.x = round(src.x)
+
+  dst.y = round(src.y)
+
+  dst.z = round(src.z)
+
+  dst.w = round(src.w)
+
+
+.. opcode:: BRA - Branch
+
+  pc = target
+
+.. note::
+
+   Considered for removal.
+
+.. opcode:: CAL - Subroutine Call
+
+  push(pc)
+  pc = target
+
+
+.. opcode:: RET - Subroutine Call Return
+
+  pc = pop()
+
+  Potential restrictions:  
+  * Only occurs at end of function.
+
+.. opcode:: SSG - Set Sign
+
+.. math::
+
+  dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
+
+  dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
+
+  dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
+
+  dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
+
+
+.. opcode:: CMP - Compare
+
+.. math::
+
+  dst.x = (src0.x < 0) ? src1.x : src2.x
+
+  dst.y = (src0.y < 0) ? src1.y : src2.y
+
+  dst.z = (src0.z < 0) ? src1.z : src2.z
+
+  dst.w = (src0.w < 0) ? src1.w : src2.w
+
+
+.. opcode:: KIL - Conditional Discard
+
+.. math::
+
+  if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0)
+    discard
+  endif
+
+
+.. opcode:: SCS - Sine Cosine
+
+.. math::
+
+  dst.x = \cos{src.x}
+
+  dst.y = \sin{src.x}
+
+  dst.z = 0
+
+  dst.y = 1
+
+
+.. opcode:: TXB - Texture Lookup With Bias
+
+  TBD
+
+
+.. opcode:: NRM - 3-component Vector Normalise
+
+.. math::
+
+  dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
+
+  dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
+
+  dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
+
+  dst.w = 1
+
+
+.. opcode:: DIV - Divide
+
+.. math::
+
+  dst.x = \frac{src0.x}{src1.x}
+
+  dst.y = \frac{src0.y}{src1.y}
+
+  dst.z = \frac{src0.z}{src1.z}
+
+  dst.w = \frac{src0.w}{src1.w}
+
+
+.. opcode:: DP2 - 2-component Dot Product
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = src0.x \times src1.x + src0.y \times src1.y
+
+
+.. opcode:: TXL - Texture Lookup With LOD
+
+  TBD
+
+
+.. opcode:: BRK - Break
+
+  TBD
+
+
+.. opcode:: IF - If
+
+  TBD
+
+
+.. opcode:: ELSE - Else
+
+  TBD
+
+
+.. opcode:: ENDIF - End If
+
+  TBD
+
+
+.. opcode:: PUSHA - Push Address Register On Stack
+
+  push(src.x)
+  push(src.y)
+  push(src.z)
+  push(src.w)
+
+.. note::
+
+   Considered for cleanup.
+
+.. note::
+
+   Considered for removal.
+
+.. opcode:: POPA - Pop Address Register From Stack
+
+  dst.w = pop()
+  dst.z = pop()
+  dst.y = pop()
+  dst.x = pop()
+
+.. note::
+
+   Considered for cleanup.
+
+.. note::
+
+   Considered for removal.
+
+
+Compute ISA
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+These opcodes are primarily provided for special-use computational shaders.
+Support for these opcodes indicated by a special pipe capability bit (TBD).
+
+XXX so let's discuss it, yeah?
+
+.. opcode:: CEIL - Ceiling
+
+.. math::
+
+  dst.x = \lceil src.x\rceil
+
+  dst.y = \lceil src.y\rceil
+
+  dst.z = \lceil src.z\rceil
+
+  dst.w = \lceil src.w\rceil
+
+
+.. opcode:: I2F - Integer To Float
+
+.. math::
+
+  dst.x = (float) src.x
+
+  dst.y = (float) src.y
+
+  dst.z = (float) src.z
+
+  dst.w = (float) src.w
+
+
+.. opcode:: NOT - Bitwise Not
+
+.. math::
+
+  dst.x = ~src.x
+
+  dst.y = ~src.y
+
+  dst.z = ~src.z
+
+  dst.w = ~src.w
+
+
+.. opcode:: TRUNC - Truncate
+
+.. math::
+
+  dst.x = trunc(src.x)
+
+  dst.y = trunc(src.y)
+
+  dst.z = trunc(src.z)
+
+  dst.w = trunc(src.w)
+
+
+.. opcode:: SHL - Shift Left
+
+.. math::
+
+  dst.x = src0.x << src1.x
+
+  dst.y = src0.y << src1.x
+
+  dst.z = src0.z << src1.x
+
+  dst.w = src0.w << src1.x
+
+
+.. opcode:: SHR - Shift Right
+
+.. math::
+
+  dst.x = src0.x >> src1.x
+
+  dst.y = src0.y >> src1.x
+
+  dst.z = src0.z >> src1.x
+
+  dst.w = src0.w >> src1.x
+
+
+.. opcode:: AND - Bitwise And
+
+.. math::
+
+  dst.x = src0.x & src1.x
+
+  dst.y = src0.y & src1.y
+
+  dst.z = src0.z & src1.z
+
+  dst.w = src0.w & src1.w
+
+
+.. opcode:: OR - Bitwise Or
+
+.. math::
+
+  dst.x = src0.x | src1.x
+
+  dst.y = src0.y | src1.y
+
+  dst.z = src0.z | src1.z
+
+  dst.w = src0.w | src1.w
+
+
+.. opcode:: MOD - Modulus
+
+.. math::
+
+  dst.x = src0.x \bmod src1.x
+
+  dst.y = src0.y \bmod src1.y
+
+  dst.z = src0.z \bmod src1.z
+
+  dst.w = src0.w \bmod src1.w
+
+
+.. opcode:: XOR - Bitwise Xor
+
+.. math::
+
+  dst.x = src0.x \oplus src1.x
+
+  dst.y = src0.y \oplus src1.y
+
+  dst.z = src0.z \oplus src1.z
+
+  dst.w = src0.w \oplus src1.w
+
+
+.. opcode:: SAD - Sum Of Absolute Differences
+
+.. math::
+
+  dst.x = |src0.x - src1.x| + src2.x
+
+  dst.y = |src0.y - src1.y| + src2.y
+
+  dst.z = |src0.z - src1.z| + src2.z
+
+  dst.w = |src0.w - src1.w| + src2.w
+
+
+.. opcode:: TXF - Texel Fetch
+
+  TBD
+
+
+.. opcode:: TXQ - Texture Size Query
+
+  TBD
+
+
+.. opcode:: CONT - Continue
+
+  TBD
+
+.. note::
+
+   Support for CONT is determined by a special capability bit,
+   ``TGSI_CONT_SUPPORTED``. See :ref:`Screen` for more information.
+
+
+Geometry ISA
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These opcodes are only supported in geometry shaders; they have no meaning
+in any other type of shader.
+
+.. opcode:: EMIT - Emit
+
+  TBD
+
+
+.. opcode:: ENDPRIM - End Primitive
+
+  TBD
+
+
+GLSL ISA
+^^^^^^^^^^
+
+These opcodes are part of :term:`GLSL`'s opcode set. Support for these
+opcodes is determined by a special capability bit, ``GLSL``.
+
+.. opcode:: BGNLOOP - Begin a Loop
+
+  TBD
+
+
+.. opcode:: BGNSUB - Begin Subroutine
+
+  TBD
+
+
+.. opcode:: ENDLOOP - End a Loop
+
+  TBD
+
+
+.. opcode:: ENDSUB - End Subroutine
+
+  TBD
+
+
+.. opcode:: NOP - No Operation
+
+  Do nothing.
+
+
+.. opcode:: NRM4 - 4-component Vector Normalise
+
+This instruction replicates its result.
+
+.. math::
+
+  dst = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+
+
+ps_2_x
+^^^^^^^^^^^^
+
+XXX wait what
+
+.. opcode:: CALLNZ - Subroutine Call If Not Zero
+
+  TBD
+
+
+.. opcode:: IFC - If
+
+  TBD
+
+
+.. opcode:: BREAKC - Break Conditional
+
+  TBD
+
+.. _doubleopcodes:
+
+Double ISA
+^^^^^^^^^^^^^^^
+
+The double-precision opcodes reinterpret four-component vectors into
+two-component vectors with doubled precision in each component.
+
+Support for these opcodes is XXX undecided. :T
+
+.. opcode:: DADD - Add
+
+.. math::
+
+  dst.xy = src0.xy + src1.xy
+
+  dst.zw = src0.zw + src1.zw
+
+
+.. opcode:: DDIV - Divide
+
+.. math::
+
+  dst.xy = src0.xy / src1.xy
+
+  dst.zw = src0.zw / src1.zw
+
+.. opcode:: DSEQ - Set on Equal
+
+.. math::
+
+  dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F
+
+  dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
+
+.. opcode:: DSLT - Set on Less than
+
+.. math::
+
+  dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F
+
+  dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
+
+.. opcode:: DFRAC - Fraction
+
+.. math::
+
+  dst.xy = src.xy - \lfloor src.xy\rfloor
+
+  dst.zw = src.zw - \lfloor src.zw\rfloor
+
+
+.. opcode:: DFRACEXP - Convert Number to Fractional and Integral Components
+
+Like the ``frexp()`` routine in many math libraries, this opcode stores the
+exponent of its source to ``dst0``, and the significand to ``dst1``, such that
+:math:`dst1 \times 2^{dst0} = src` .
+
+.. math::
+
+  dst0.xy = exp(src.xy)
+
+  dst1.xy = frac(src.xy)
+
+  dst0.zw = exp(src.zw)
+
+  dst1.zw = frac(src.zw)
+
+.. opcode:: DLDEXP - Multiply Number by Integral Power of 2
+
+This opcode is the inverse of :opcode:`DFRACEXP`.
+
+.. math::
+
+  dst.xy = src0.xy \times 2^{src1.xy}
+
+  dst.zw = src0.zw \times 2^{src1.zw}
+
+.. opcode:: DMIN - Minimum
+
+.. math::
+
+  dst.xy = min(src0.xy, src1.xy)
+
+  dst.zw = min(src0.zw, src1.zw)
+
+.. opcode:: DMAX - Maximum
+
+.. math::
+
+  dst.xy = max(src0.xy, src1.xy)
+
+  dst.zw = max(src0.zw, src1.zw)
+
+.. opcode:: DMUL - Multiply
+
+.. math::
+
+  dst.xy = src0.xy \times src1.xy
+
+  dst.zw = src0.zw \times src1.zw
+
+
+.. opcode:: DMAD - Multiply And Add
+
+.. math::
+
+  dst.xy = src0.xy \times src1.xy + src2.xy
+
+  dst.zw = src0.zw \times src1.zw + src2.zw
+
+
+.. opcode:: DRCP - Reciprocal
+
+.. math::
+
+   dst.xy = \frac{1}{src.xy}
+
+   dst.zw = \frac{1}{src.zw}
+
+.. opcode:: DSQRT - Square Root
+
+.. math::
+
+   dst.xy = \sqrt{src.xy}
+
+   dst.zw = \sqrt{src.zw}
+
+
+Explanation of symbols used
+------------------------------
+
+
+Functions
+^^^^^^^^^^^^^^
+
+
+  :math:`|x|`       Absolute value of `x`.
+
+  :math:`\lceil x \rceil` Ceiling of `x`.
+
+  clamp(x,y,z)      Clamp x between y and z.
+                    (x < y) ? y : (x > z) ? z : x
+
+  :math:`\lfloor x\rfloor` Floor of `x`.
+
+  :math:`\log_2{x}` Logarithm of `x`, base 2.
+
+  max(x,y)          Maximum of x and y.
+                    (x > y) ? x : y
+
+  min(x,y)          Minimum of x and y.
+                    (x < y) ? x : y
+
+  partialx(x)       Derivative of x relative to fragment's X.
+
+  partialy(x)       Derivative of x relative to fragment's Y.
+
+  pop()             Pop from stack.
+
+  :math:`x^y`       `x` to the power `y`.
+
+  push(x)           Push x on stack.
+
+  round(x)          Round x.
+
+  trunc(x)          Truncate x, i.e. drop the fraction bits.
+
+
+Keywords
+^^^^^^^^^^^^^
+
+
+  discard           Discard fragment.
+
+  pc                Program counter.
+
+  target            Label of target instruction.
+
+
+Other tokens
+---------------
+
+
+Declaration
+^^^^^^^^^^^
+
+
+Declares a register that is will be referenced as an operand in Instruction
+tokens.
+
+File field contains register file that is being declared and is one
+of TGSI_FILE.
+
+UsageMask field specifies which of the register components can be accessed
+and is one of TGSI_WRITEMASK.
+
+Interpolate field is only valid for fragment shader INPUT register files.
+It specifes the way input is being interpolated by the rasteriser and is one
+of TGSI_INTERPOLATE.
+
+If Dimension flag is set to 1, a Declaration Dimension token follows.
+
+If Semantic flag is set to 1, a Declaration Semantic token follows.
+
+CylindricalWrap bitfield is only valid for fragment shader INPUT register
+files. It specifies which register components should be subject to cylindrical
+wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X
+is set to 1, the X component should be interpolated according to cylindrical
+wrapping rules.
+
+
+Declaration Semantic
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+  Follows Declaration token if Semantic bit is set.
+
+  Since its purpose is to link a shader with other stages of the pipeline,
+  it is valid to follow only those Declaration tokens that declare a register
+  either in INPUT or OUTPUT file.
+
+  SemanticName field contains the semantic name of the register being declared.
+  There is no default value.
+
+  SemanticIndex is an optional subscript that can be used to distinguish
+  different register declarations with the same semantic name. The default value
+  is 0.
+
+  The meanings of the individual semantic names are explained in the following
+  sections.
+
+TGSI_SEMANTIC_POSITION
+""""""""""""""""""""""
+
+Position, sometimes known as HPOS or WPOS for historical reasons, is the
+location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z``
+are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used
+for the perspective divide, if enabled.
+
+As a vertex shader output, position should be scaled to the viewport. When
+used in fragment shaders, position will be in window coordinates. The convention
+used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties.
+
+XXX additionally, is there a way to configure the perspective divide? it's
+accelerated on most chipsets AFAIK...
+
+Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can
+be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``.
+
+XXX usually? can we solidify that?
+
+TGSI_SEMANTIC_COLOR
+"""""""""""""""""""
+
+Colors are used to, well, color the primitives. Colors are always in
+``(r, g, b, a)`` format.
+
+If alpha is not specified, it defaults to 1.
+
+TGSI_SEMANTIC_BCOLOR
+""""""""""""""""""""
+
+Back-facing colors are only used for back-facing polygons, and are only valid
+in vertex shader outputs. After rasterization, all polygons are front-facing
+and COLOR and BCOLOR end up occupying the same slots in the fragment, so
+all BCOLORs effectively become regular COLORs in the fragment shader.
+
+TGSI_SEMANTIC_FOG
+"""""""""""""""""
+
+The fog coordinate historically has been used to replace the depth coordinate
+for generation of fog in dedicated fog blocks. Gallium, however, does not use
+dedicated fog acceleration, placing it entirely in the fragment shader
+instead.
+
+The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first
+component matters when writing from the vertex shader; the driver will ensure
+that the coordinate is in this format when used as a fragment shader input.
+
+TGSI_SEMANTIC_PSIZE
+"""""""""""""""""""
+
+PSIZE, or point size, is used to specify point sizes per-vertex. It should
+be in ``(s, 0, 0, 1)`` format, where ``s`` is the (possibly clamped) point size.
+Only the first component matters when writing from the vertex shader.
+
+When using this semantic, be sure to set the appropriate state in the
+:ref:`rasterizer` first.
+
+TGSI_SEMANTIC_GENERIC
+"""""""""""""""""""""
+
+Generic semantics are nearly always used for texture coordinate attributes,
+in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds
+of lookups, and ``q`` is the level-of-detail bias for biased sampling.
+
+These attributes are called "generic" because they may be used for anything
+else, including parameters, texture generation information, or anything that
+can be stored inside a four-component vector.
+
+TGSI_SEMANTIC_NORMAL
+""""""""""""""""""""
+
+Vertex normal; could be used to implement per-pixel lighting for legacy APIs
+that allow mixing fixed-function and programmable stages.
+
+TGSI_SEMANTIC_FACE
+""""""""""""""""""
+
+FACE is the facing bit, to store the facing information for the fragment
+shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive
+when the fragment is front-facing, and negative when the component is
+back-facing.
+
+TGSI_SEMANTIC_EDGEFLAG
+""""""""""""""""""""""
+
+XXX no clue
+
+
+Properties
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+  Properties are general directives that apply to the whole TGSI program.
+
+FS_COORD_ORIGIN
+"""""""""""""""
+
+Specifies the fragment shader TGSI_SEMANTIC_POSITION coordinate origin.
+The default value is UPPER_LEFT.
+
+If UPPER_LEFT, the position will be (0,0) at the upper left corner and
+increase downward and rightward.
+If LOWER_LEFT, the position will be (0,0) at the lower left corner and
+increase upward and rightward.
+
+OpenGL defaults to LOWER_LEFT, and is configurable with the
+GL_ARB_fragment_coord_conventions extension.
+
+DirectX 9/10 use UPPER_LEFT.
+
+FS_COORD_PIXEL_CENTER
+"""""""""""""""""""""
+
+Specifies the fragment shader TGSI_SEMANTIC_POSITION pixel center convention.
+The default value is HALF_INTEGER.
+
+If HALF_INTEGER, the fractionary part of the position will be 0.5
+If INTEGER, the fractionary part of the position will be 0.0
+
+Note that this does not affect the set of fragments generated by
+rasterization, which is instead controlled by gl_rasterization_rules in the
+rasterizer.
+
+OpenGL defaults to HALF_INTEGER, and is configurable with the
+GL_ARB_fragment_coord_conventions extension.
+
+DirectX 9 uses INTEGER.
+DirectX 10 uses HALF_INTEGER.
+
+
+
+Texture Sampling and Texture Formats
+------------------------------------
+
+This table shows how texture image components are returned as (x,y,z,w) tuples
+by TGSI texture instructions, such as :opcode:`TEX`, :opcode:`TXD`, and
+:opcode:`TXP`. For reference, OpenGL and Direct3D conventions are shown as
+well.
+
++--------------------+--------------+--------------------+--------------+
+| Texture Components | Gallium      | OpenGL             | Direct3D 9   |
++====================+==============+====================+==============+
+| R                  | (r, 0, 0, 1) | (r, 0, 0, 1)       | (r, 1, 1, 1) |
++--------------------+--------------+--------------------+--------------+
+| RG                 | (r, g, 0, 1) | (r, g, 0, 1)       | (r, g, 1, 1) |
++--------------------+--------------+--------------------+--------------+
+| RGB                | (r, g, b, 1) | (r, g, b, 1)       | (r, g, b, 1) |
++--------------------+--------------+--------------------+--------------+
+| RGBA               | (r, g, b, a) | (r, g, b, a)       | (r, g, b, a) |
++--------------------+--------------+--------------------+--------------+
+| A                  | (0, 0, 0, a) | (0, 0, 0, a)       | (0, 0, 0, a) |
++--------------------+--------------+--------------------+--------------+
+| L                  | (l, l, l, 1) | (l, l, l, 1)       | (l, l, l, 1) |
++--------------------+--------------+--------------------+--------------+
+| LA                 | (l, l, l, a) | (l, l, l, a)       | (l, l, l, a) |
++--------------------+--------------+--------------------+--------------+
+| I                  | (i, i, i, i) | (i, i, i, i)       | N/A          |
++--------------------+--------------+--------------------+--------------+
+| UV                 | XXX TBD      | (0, 0, 0, 1)       | (u, v, 1, 1) |
+|                    |              | [#envmap-bumpmap]_ |              |
++--------------------+--------------+--------------------+--------------+
+| Z                  | XXX TBD      | (z, z, z, 1)       | (0, z, 0, 1) |
+|                    |              | [#depth-tex-mode]_ |              |
++--------------------+--------------+--------------------+--------------+
+
+.. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
+.. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)
+   or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE.