/**************************************************************************
 *
 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 **************************************************************************/

#include "pipe/p_context.h"
#include "pipe/p_format.h"
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include "util/u_blitter.h"

#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_screen.h"
#include "nvfx_context.h"
#include "nvfx_screen.h"
#include "nvfx_resource.h"
#include "nv04_2d.h"

#include <nouveau/nouveau_bo.h>

static INLINE void
nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
{
	unsigned bits = util_format_get_blocksizebits(format);
	unsigned shift = 0;
	rgn->one_bits = 0;

	switch(bits)
	{
	case 8:
		rgn->bpps = 0;
		break;
	case 16:
		rgn->bpps = 1;
		if(format == PIPE_FORMAT_B5G5R5X1_UNORM)
			rgn->one_bits = 1;
		break;
	case 32:
		rgn->bpps = 2;
		if(format == PIPE_FORMAT_R8G8B8X8_UNORM || format == PIPE_FORMAT_B8G8R8X8_UNORM)
			rgn->one_bits = 8;
		break;
	case 64:
		rgn->bpps = 2;
		shift = 1;
		break;
	case 128:
		rgn->bpps = 2;
		shift = 2;
		break;
	}

	if(shift) {
		rgn->x = util_format_get_nblocksx(format, rgn->x) << shift;
		rgn->y = util_format_get_nblocksy(format, rgn->y);
		rgn->w <<= shift;
	}
}

static INLINE void
nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write)
{
	rgn->x = x;
	rgn->y = y;
	rgn->z = 0;

	if(surf->temp)
	{
		rgn->bo = surf->temp->base.bo;
		rgn->offset = 0;
		rgn->pitch = surf->temp->linear_pitch;

		if(for_write)
			util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base);
	} else {
		rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo;
		rgn->offset = surf->base.base.offset;

		if(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR)
			rgn->pitch = surf->pitch;
	        else
	        {
		        rgn->pitch = 0;
		        rgn->z = surf->base.base.zslice;
		        rgn->w = surf->base.base.width;
		        rgn->h = surf->base.base.height;
		        rgn->d = u_minify(surf->base.base.texture->depth0, surf->base.base.level);
	        }
	}

	nvfx_region_set_format(rgn, surf->base.base.format);
	if(!rgn->pitch)
		nv04_region_try_to_linearize(rgn);
}

static INLINE void
nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write)
{
	if(pt->target != PIPE_BUFFER)
	{
		struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z);
		if(ns && util_dirty_surface_is_dirty(&ns->base))
		{
			nvfx_region_init_for_surface(rgn, ns, x, y, for_write);
			return;
		}
	}

	rgn->bo = ((struct nvfx_resource*)pt)->bo;
	rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z);
	rgn->x = x;
	rgn->y = y;

	if(pt->flags & NVFX_RESOURCE_FLAG_LINEAR)
	{
		rgn->pitch = nvfx_subresource_pitch(pt, sub.level);
		rgn->z = 0;
	}
	else
	{
		rgn->pitch = 0;
		rgn->z = z;
		rgn->w = u_minify(pt->width0, sub.level);
		rgn->h = u_minify(pt->height0, sub.level);
		rgn->d = u_minify(pt->depth0, sub.level);
	}

	nvfx_region_set_format(rgn, pt->format);
	if(!rgn->pitch)
		nv04_region_try_to_linearize(rgn);
}

// don't save index buffer because blitter doesn't setit
static struct blitter_context*
nvfx_get_blitter(struct pipe_context* pipe, int copy)
{
	struct nvfx_context* nvfx = nvfx_context(pipe);
	struct blitter_context** pblitter;
	struct blitter_context* blitter;

	assert(nvfx->blitters_in_use < Elements(nvfx->blitter));

	if(nvfx->query && !nvfx->blitters_in_use)
	{
		struct nouveau_channel* chan = nvfx->screen->base.channel;
		WAIT_RING(chan, 2);
		OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
		OUT_RING(chan, 0);
	}

	pblitter = &nvfx->blitter[nvfx->blitters_in_use++];
	if(!*pblitter)
		*pblitter = util_blitter_create(pipe);
	blitter = *pblitter;

	util_blitter_save_blend(blitter, nvfx->blend);
	util_blitter_save_depth_stencil_alpha(blitter, nvfx->zsa);
	util_blitter_save_stencil_ref(blitter, &nvfx->stencil_ref);
	util_blitter_save_rasterizer(blitter, nvfx->rasterizer);
	util_blitter_save_fragment_shader(blitter, nvfx->fragprog);
	util_blitter_save_vertex_shader(blitter, nvfx->vertprog);
	util_blitter_save_viewport(blitter, &nvfx->viewport);
	util_blitter_save_framebuffer(blitter, &nvfx->framebuffer);
	util_blitter_save_clip(blitter, &nvfx->clip);
	util_blitter_save_vertex_elements(blitter, nvfx->vtxelt);
	util_blitter_save_vertex_buffers(blitter, nvfx->vtxbuf_nr, nvfx->vtxbuf);

	if(copy)
	{
		util_blitter_save_fragment_sampler_states(blitter, nvfx->nr_samplers, (void**)nvfx->tex_sampler);
		util_blitter_save_fragment_sampler_views(blitter, nvfx->nr_textures, nvfx->fragment_sampler_views);
	}

	return blitter;
}

static inline void
nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter)
{
	struct nvfx_context* nvfx = nvfx_context(pipe);
	--nvfx->blitters_in_use;
	assert(nvfx->blitters_in_use >= 0);

	if(nvfx->query && !nvfx->blitters_in_use)
	{
		struct nouveau_channel* chan = nvfx->screen->base.channel;
		WAIT_RING(chan, 2);
		OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
		OUT_RING(chan, 1);
	}
}

static unsigned
nvfx_region_clone(struct nv04_2d_context* ctx, struct nv04_region* rgn, unsigned w, unsigned h, boolean for_read)
{
	unsigned begin = nv04_region_begin(rgn, w, h);
	unsigned end = nv04_region_end(rgn, w, h);
	unsigned size = end - begin;
	struct nouveau_bo* bo = 0;
	nouveau_bo_new(rgn->bo->device, NOUVEAU_BO_MAP | NOUVEAU_BO_GART, 256, size, &bo);

	if(for_read || (size > ((w * h) << rgn->bpps)))
		nv04_memcpy(ctx, bo, 0, rgn->bo, rgn->offset + begin, size);

	rgn->bo = bo;
	rgn->offset = -begin;
	return begin;
}

static void
nvfx_resource_copy_region(struct pipe_context *pipe,
		  struct pipe_resource *dstr, struct pipe_subresource subdst,
		  unsigned dstx, unsigned dsty, unsigned dstz,
		  struct pipe_resource *srcr, struct pipe_subresource subsrc,
		  unsigned srcx, unsigned srcy, unsigned srcz,
		  unsigned w, unsigned h)
{
	static int copy_threshold = -1;
	struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
	struct nv04_region dst, src;
	int dst_to_gpu;
	int src_on_gpu;
	boolean small;
	int ret;

	if(!w || !h)
		return;

	if(copy_threshold < 0)
		copy_threshold = debug_get_num_option("NOUVEAU_COPY_THRESHOLD", 4);

	dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
	src_on_gpu = nvfx_resource_on_gpu(srcr);

	nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE);
	nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE);
	w = util_format_get_stride(dstr->format, w) >> dst.bpps;
	h = util_format_get_nblocksy(dstr->format, h);

	small = (w * h <= copy_threshold);
	if((!dst_to_gpu || !src_on_gpu) && small)
		ret = -1; /* use the CPU */
	else
		ret = nv04_region_copy_2d(ctx, &dst, &src, w, h, dst_to_gpu, src_on_gpu);
	if(!ret)
	{}
	else if(ret > 0
			&& dstr->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)
			&& srcr->bind & PIPE_BIND_SAMPLER_VIEW)
	{
		/* this currently works because we hack the bind flags on resource creation to be
		 * the maximum set that the resource type actually supports
		 *
		 * TODO: perhaps support reinterpreting the formats
		 */
		struct blitter_context* blitter = nvfx_get_blitter(pipe, 1);
		util_blitter_copy_region(blitter, dstr, subdst, dstx, dsty, dstz, srcr, subsrc, srcx, srcy, srcz, w, h, TRUE);
		nvfx_put_blitter(pipe, blitter);
	}
	else
	{
		struct nv04_region dstt = dst;
		struct nv04_region srct = src;
		unsigned dstbegin = 0;

		if(!small)
		{
			if(src_on_gpu)
				nvfx_region_clone(ctx, &srct, w, h, TRUE);

			if(dst_to_gpu)
				dstbegin = nvfx_region_clone(ctx, &dstt, w, h, FALSE);
		}

		nv04_region_copy_cpu(&dstt, &srct, w, h);

		if(srct.bo != src.bo)
			nouveau_screen_bo_release(pipe->screen, srct.bo);

		if(dstt.bo != dst.bo)
		{
			nv04_memcpy(ctx, dst.bo, dst.offset + dstbegin, dstt.bo, 0, dstt.bo->size);
			nouveau_screen_bo_release(pipe->screen, dstt.bo);
		}
	}
}

static int
nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dsts,
		  unsigned dx, unsigned dy, unsigned w, unsigned h, unsigned value)
{
	struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
	struct nv04_region dst;
	int ret;
	/* Always try to use the GPU right now, if possible
	 * If the user wanted the surface data on the CPU, he would have cleared with memset (hopefully) */

	// we don't care about interior pixel order since we set all them to the same value
	nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy, TRUE);

	w = util_format_get_stride(dsts->format, w) >> dst.bpps;
	h = util_format_get_nblocksy(dsts->format, h);

	ret = nv04_region_fill_2d(ctx, &dst, w, h, value);
	if(ret > 0 && dsts->texture->bind & PIPE_BIND_RENDER_TARGET)
		return 1;
	else if(ret)
	{
		struct nv04_region dstt = dst;
		unsigned dstbegin = 0;

		if(nvfx_resource_on_gpu(dsts->texture))
			dstbegin = nvfx_region_clone(ctx, &dstt, w, h, FALSE);

		nv04_region_fill_cpu(&dstt, w, h, value);

		if(dstt.bo != dst.bo)
		{
			nv04_memcpy(ctx, dst.bo, dst.offset + dstbegin, dstt.bo, 0, dstt.bo->size);
			nouveau_screen_bo_release(pipe->screen, dstt.bo);
		}
	}

	return 0;
}


void
nvfx_screen_surface_takedown(struct pipe_screen *pscreen)
{
	nv04_2d_context_takedown(nvfx_screen(pscreen)->eng2d);
	nvfx_screen(pscreen)->eng2d = 0;
}

int
nvfx_screen_surface_init(struct pipe_screen *pscreen)
{
	struct nv04_2d_context* ctx = nv04_2d_context_init(nouveau_screen(pscreen)->channel);
	if(!ctx)
		return -1;
	nvfx_screen(pscreen)->eng2d = ctx;
	return 0;
}

static void
nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp)
{
	struct nvfx_surface* ns = (struct nvfx_surface*)surf;
	struct pipe_subresource tempsr, surfsr;
	struct nvfx_context* nvfx = nvfx_context(pipe);
	struct nvfx_miptree* temp;
	unsigned use_vertex_buffers;
	boolean use_index_buffer;
	unsigned base_vertex;

	/* temporarily detach the temp, so it isn't used in place of the actual resource */
	temp = ns->temp;
	ns->temp = 0;

	// TODO: we really should do this validation before setting these variable in draw calls
	use_vertex_buffers = nvfx->use_vertex_buffers;
	use_index_buffer = nvfx->use_index_buffer;
	base_vertex = nvfx->base_vertex;

	tempsr.face = 0;
	tempsr.level = 0;
	surfsr.face = surf->face;
	surfsr.level = surf->level;

	if(to_temp)
		nvfx_resource_copy_region(pipe, &temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
	else
		nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);

	/* If this triggers, it probably means we attempted to use the blitter
	 * but failed due to non-renderability of the target.
	 * Obviously, this would lead to infinite recursion if supported. */
	assert(!ns->temp);

	ns->temp = temp;

	nvfx->use_vertex_buffers = use_vertex_buffers;
	nvfx->use_index_buffer = use_index_buffer;
        nvfx->base_vertex = base_vertex;

	nvfx->dirty |= NVFX_NEW_ARRAYS;
	nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
}

void
nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf)
{
	struct nvfx_surface* ns = (struct nvfx_surface*)surf;
	struct pipe_resource template;
	memset(&template, 0, sizeof(struct pipe_resource));
	template.target = PIPE_TEXTURE_2D;
	template.format = surf->format;
	template.width0 = surf->width;
	template.height0 = surf->height;
	template.depth0 = 1;
	template.nr_samples = surf->texture->nr_samples;
	template.flags = NVFX_RESOURCE_FLAG_LINEAR;

	assert(!ns->temp && !util_dirty_surface_is_dirty(&ns->base));

	ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template);
	nvfx_surface_copy_temp(pipe, surf, 1);
}

void
nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf)
{
	struct nvfx_context* nvfx = (struct nvfx_context*)pipe;
	struct nvfx_surface* ns = (struct nvfx_surface*)surf;
	boolean bound = FALSE;

	nvfx_surface_copy_temp(pipe, surf, 0);

	util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base);

	if(nvfx->framebuffer.zsbuf == surf)
		bound = TRUE;
	else
	{
		for(unsigned i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
		{
			if(nvfx->framebuffer.cbufs[i] == surf)
			{
				bound = TRUE;
				break;
			}
		}
	}

	if(!bound)
		pipe_resource_reference((struct pipe_resource**)&ns->temp, 0);
}

static void
nvfx_clear_render_target(struct pipe_context *pipe,
			 struct pipe_surface *dst,
			 const float *rgba,
			 unsigned dstx, unsigned dsty,
			 unsigned width, unsigned height)
{
	union util_color uc;
	util_pack_color(rgba, dst->format, &uc);

	if(util_format_get_blocksizebits(dst->format) > 32
		|| nvfx_surface_fill(pipe, dst, dstx, dsty, width, height, uc.ui))
	{
		// TODO: probably should use hardware clear here instead if possible
		struct blitter_context* blitter = nvfx_get_blitter(pipe, 0);
		util_blitter_clear_render_target(blitter, dst, rgba, dstx, dsty, width, height);
		nvfx_put_blitter(pipe, blitter);
	}
}

static void
nvfx_clear_depth_stencil(struct pipe_context *pipe,
			 struct pipe_surface *dst,
			 unsigned clear_flags,
			 double depth,
			 unsigned stencil,
			 unsigned dstx, unsigned dsty,
			 unsigned width, unsigned height)
{
	if(util_format_get_blocksizebits(dst->format) > 32
		|| nvfx_surface_fill(pipe, dst, dstx, dsty, width, height, util_pack_z_stencil(dst->format, depth, stencil)))
	{
		// TODO: probably should use hardware clear here instead if possible
		struct blitter_context* blitter = nvfx_get_blitter(pipe, 0);
		util_blitter_clear_depth_stencil(blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height);
		nvfx_put_blitter(pipe, blitter);
	}
}


void
nvfx_init_surface_functions(struct nvfx_context *nvfx)
{
	nvfx->pipe.resource_copy_region = nvfx_resource_copy_region;
	nvfx->pipe.clear_render_target = nvfx_clear_render_target;
	nvfx->pipe.clear_depth_stencil = nvfx_clear_depth_stencil;
}