Code Search for Developers
 
 
  

GSRendererSoft.cpp from guliverkli at Krugle


Show GSRendererSoft.cpp syntax highlighted

/* 
 *	Copyright (C) 2003-2005 Gabest
 *	http://www.gabest.org
 *
 *  This Program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *   
 *  This Program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *   
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 *  http://www.gnu.org/copyleft/gpl.html
 *
 */

#include "StdAfx.h"
#include "GSRendererSoft.h"
#include "x86.h"

template <class Vertex>
GSRendererSoft<Vertex>::GSRendererSoft(HWND hWnd, HRESULT& hr)
	: GSRenderer<Vertex>(640, 512, hWnd, hr)
{
	Reset();

	int i = SHRT_MIN;
	BYTE j = 0;
	for(; i < 0; i++, j++) m_clip[j] = 0, m_mask[j] = j;
	for(; i < 256; i++, j++) m_clip[j] = (BYTE)i, m_mask[j] = j;
	for(; i < SHRT_MAX; i++, j++) m_clip[j] = 255, m_mask[j] = j;

	m_uv = (uv_wrap_t*)_aligned_malloc(sizeof(uv_wrap_t), 16);

	// w00t :P

	#define InitATST(iZTST, iATST) \
		m_dv[iZTST][iATST] = &GSRendererSoft<Vertex>::DrawVertex<iZTST, iATST>; \

	#define InitZTST(iZTST) \
		InitATST(iZTST, 0) \
		InitATST(iZTST, 1) \
		InitATST(iZTST, 2) \
		InitATST(iZTST, 3) \
		InitATST(iZTST, 4) \
		InitATST(iZTST, 5) \
		InitATST(iZTST, 6) \
		InitATST(iZTST, 7) \

	#define InitDV() \
		InitZTST(0) \
		InitZTST(1) \
		InitZTST(2) \
		InitZTST(3) \

	InitDV();

	#define InitTFX(iLOD, bLCM, bTCC, iTFX) \
		m_dvtfx[iLOD][bLCM][bTCC][iTFX] = &GSRendererSoft<Vertex>::DrawVertexTFX<iLOD, bLCM, bTCC, iTFX>; \

	#define InitTCC(iLOD, bLCM, bTCC) \
		InitTFX(iLOD, bLCM, bTCC, 0) \
		InitTFX(iLOD, bLCM, bTCC, 1) \
		InitTFX(iLOD, bLCM, bTCC, 2) \
		InitTFX(iLOD, bLCM, bTCC, 3) \

	#define InitLCM(iLOD, bLCM) \
		InitTCC(iLOD, bLCM, false) \
		InitTCC(iLOD, bLCM, true) \

	#define InitLOD(iLOD) \
		InitLCM(iLOD, false) \
		InitLCM(iLOD, true) \

	#define InitDVTFX() \
		InitLOD(0) \
		InitLOD(1) \
		InitLOD(2) \
		InitLOD(3) \

	InitDVTFX();
}

template <class Vertex>
GSRendererSoft<Vertex>::~GSRendererSoft()
{
	_aligned_free(m_uv);
}

template <class Vertex>
HRESULT GSRendererSoft<Vertex>::ResetDevice(bool fForceWindowed)
{
	m_pRT[0] = NULL;
	m_pRT[1] = NULL;

	return __super::ResetDevice(fForceWindowed);
}

template <class Vertex>
void GSRendererSoft<Vertex>::Reset()
{
	m_primtype = PRIM_NONE;
	m_pTexture = NULL;

	__super::Reset();
}

template <class Vertex>
int GSRendererSoft<Vertex>::DrawingKick(bool fSkip)
{
	Vertex* pVertices = &m_pVertices[m_nVertices];
	int nVertices = 0;

	switch(m_PRIM)
	{
	case 3: // triangle list
		m_primtype = PRIM_TRIANGLE;
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		LOGV((pVertices[0], _T("TriList")));
		LOGV((pVertices[1], _T("TriList")));
		LOGV((pVertices[2], _T("TriList")));
		break;
	case 4: // triangle strip
		m_primtype = PRIM_TRIANGLE;
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		m_vl.GetAt(0, pVertices[nVertices++]);
		m_vl.GetAt(1, pVertices[nVertices++]);
		LOGV((pVertices[0], _T("TriStrip")));
		LOGV((pVertices[1], _T("TriStrip")));
		LOGV((pVertices[2], _T("TriStrip")));
		break;
	case 5: // triangle fan
		m_primtype = PRIM_TRIANGLE;
		m_vl.GetAt(0, pVertices[nVertices++]);
		m_vl.RemoveAt(1, pVertices[nVertices++]);
		m_vl.GetAt(1, pVertices[nVertices++]);
		LOGV((pVertices[0], _T("TriFan")));
		LOGV((pVertices[1], _T("TriFan")));
		LOGV((pVertices[2], _T("TriFan")));
		break;
	case 6: // sprite
		m_primtype = PRIM_SPRITE;
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		nVertices += 2;
		pVertices[0].p.z = pVertices[1].p.z;
		pVertices[0].p.q = pVertices[1].p.q;
		pVertices[2] = pVertices[1];
		pVertices[3] = pVertices[1];
		pVertices[1].p.y = pVertices[0].p.y;
		pVertices[1].t.y = pVertices[0].t.y;
		pVertices[2].p.x = pVertices[0].p.x;
		pVertices[2].t.x = pVertices[0].t.x;
		LOGV((pVertices[0], _T("Sprite")));
		LOGV((pVertices[1], _T("Sprite")));
		LOGV((pVertices[2], _T("Sprite")));
		LOGV((pVertices[3], _T("Sprite")));
		/*
		m_primtype = PRIM_TRIANGLE;
		nVertices += 2;
		pVertices[5] = pVertices[3];
		pVertices[3] = pVertices[1];
		pVertices[4] = pVertices[2];
		*/
		break;
	case 1: // line
		m_primtype = PRIM_LINE;
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		LOGV((pVertices[0], _T("LineList")));
		LOGV((pVertices[1], _T("LineList")));
		break;
	case 2: // line strip
		m_primtype = PRIM_LINE;
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		m_vl.GetAt(0, pVertices[nVertices++]);
		LOGV((pVertices[0], _T("LineStrip")));
		LOGV((pVertices[1], _T("LineStrip")));
		break;
	case 0: // point
		m_primtype = PRIM_POINT;
		m_vl.RemoveAt(0, pVertices[nVertices++]);
		LOGV((pVertices[0], _T("PointList")));
		break;
	default:
		ASSERT(0);
		return 0;
	}

	if(fSkip || !m_rs.IsEnabled(0) && !m_rs.IsEnabled(1))
		return 0;

	if(!m_pPRIM->IIP)
	{
		Vertex::Vector c = pVertices[nVertices-1].c;
		for(int i = 0; i < nVertices-1; i++) 
			pVertices[i].c = c;
	}

	return nVertices;
}

template <class Vertex>
void GSRendererSoft<Vertex>::FlushPrim()
{
	if(m_nVertices > 0)
	{
CString fn;
static int s_savenum = 0;
s_savenum++;

if(0)
//if(m_ctxt->FRAME.Block() == 0x008c0 && (DWORD)m_ctxt->TEX0.TBP0 == 0x03a98)
//if(m_ctxt->TEX0.PSM == 0x1b)
if(m_perfmon.GetFrame() >= 200)
{
fn.Format(_T("g:/tmp/%04I64d_%06d_1f_%05x_%x.bmp"), m_perfmon.GetFrame(), s_savenum, m_ctxt->FRAME.Block(), m_ctxt->FRAME.PSM);
m_lm.SaveBMP(m_pD3DDev, fn, m_ctxt->FRAME.Block(), m_ctxt->FRAME.FBW, m_ctxt->FRAME.PSM, m_ctxt->FRAME.FBW*64, 224);

if(m_pPRIM->TME)
{
fn.Format(_T("g:/tmp/%04I64d_%06d_2t_%05x_%x.bmp"), m_perfmon.GetFrame(), s_savenum, (DWORD)m_ctxt->TEX0.TBP0, (DWORD)m_ctxt->TEX0.PSM);
m_lm.SaveBMP(m_pD3DDev, fn, m_ctxt->TEX0.TBP0, m_ctxt->TEX0.TBW, m_ctxt->TEX0.PSM, 1 << m_ctxt->TEX0.TW, 1 << m_ctxt->TEX0.TH);
}
}

		int iZTST = !m_ctxt->TEST.ZTE ? 1 : m_ctxt->TEST.ZTST;
		int iATST = !m_ctxt->TEST.ATE ? 1 : m_ctxt->TEST.ATST;

		m_pDrawVertex = m_dv[iZTST][iATST];

		if(m_pPRIM->TME)
		{
			int iLOD = (m_ctxt->TEX1.MMAG & 1) + (m_ctxt->TEX1.MMIN & 1);
			int bLCM = m_ctxt->TEX1.LCM ? 1 : 0;
			int bTCC = m_ctxt->TEX0.TCC ? 1 : 0;
			int iTFX = m_ctxt->TEX0.TFX;

			if(m_pPRIM->FST)
			{
				iLOD = 3;
				bLCM = m_ctxt->TEX1.K <= 0 && (m_ctxt->TEX1.MMAG & 1) || m_ctxt->TEX1.K > 0 && (m_ctxt->TEX1.MMIN & 1);
			}

			m_pDrawVertexTFX = m_dvtfx[iLOD][bLCM][bTCC][iTFX];
		}

		SetupTexture();
		
		m_scissor.SetRect(
			max(m_ctxt->SCISSOR.SCAX0, 0),
			max(m_ctxt->SCISSOR.SCAY0, 0),
			min(m_ctxt->SCISSOR.SCAX1+1, m_ctxt->FRAME.FBW * 64),
			min(m_ctxt->SCISSOR.SCAY1+1, 4096));

		m_clamp = (m_de.COLCLAMP.CLAMP ? m_clip : m_mask) + 32768;

		int nPrims = 0;
		Vertex* pVertices = m_pVertices;

		switch(m_primtype)
		{
		case PRIM_SPRITE:
			ASSERT(!(m_nVertices&3));
			nPrims = m_nVertices / 4;
			LOG(_T("FlushPrim(pt=%d, nVertices=%d, nPrims=%d)\n"), m_primtype, m_nVertices, nPrims);
			for(int i = 0; i < nPrims; i++, pVertices += 4) DrawSprite(pVertices);
			break;
		case PRIM_TRIANGLE:
			ASSERT(!(m_nVertices%3));
			nPrims = m_nVertices / 3;
			LOG(_T("FlushPrim(pt=%d, nVertices=%d, nPrims=%d)\n"), m_primtype, m_nVertices, nPrims);
			for(int i = 0; i < nPrims; i++, pVertices += 3) DrawTriangle(pVertices);
			break;
		case PRIM_LINE: 
			ASSERT(!(m_nVertices&1));
			nPrims = m_nVertices / 2;
			LOG(_T("FlushPrim(pt=%d, nVertices=%d, nPrims=%d)\n"), m_primtype, m_nVertices, nPrims);
			for(int i = 0; i < nPrims; i++, pVertices += 2) DrawLine(pVertices);
			break;
		case PRIM_POINT:
			nPrims = m_nVertices;
			LOG(_T("FlushPrim(pt=%d, nVertices=%d, nPrims=%d)\n"), m_primtype, m_nVertices, nPrims);
			for(int i = 0; i < nPrims; i++, pVertices++) DrawPoint(pVertices);
			break;
		default:
			ASSERT(m_nVertices == 0);
			return;
		}

		m_perfmon.IncCounter(GSPerfMon::c_prim, nPrims);

if(0)
//if(m_ctxt->FRAME.Block() == 0x008c0 && (DWORD)m_ctxt->TEX0.TBP0 == 0x03a98)
//if(m_ctxt->TEX0.PSM == 0x1b)
if(m_perfmon.GetFrame() >= 200)
{
fn.Format(_T("g:/tmp/%04I64d_%06d_3f_%05x_%x.bmp"), m_perfmon.GetFrame(), s_savenum, m_ctxt->FRAME.Block(), m_ctxt->FRAME.PSM);
m_lm.SaveBMP(m_pD3DDev, fn, m_ctxt->FRAME.Block(), m_ctxt->FRAME.FBW, m_ctxt->FRAME.PSM, m_ctxt->FRAME.FBW*64, 224);
}
	}

	m_primtype = PRIM_NONE;

	__super::FlushPrim();
}

template <class Vertex>
void GSRendererSoft<Vertex>::Flip()
{
	HRESULT hr;

	FlipInfo rt[2];

	for(int i = 0; i < countof(rt); i++)
	{
		if(m_rs.IsEnabled(i))
		{
			CRect rect = CRect(CPoint(0, 0), m_rs.GetDispRect(i).BottomRight());

			//GSLocalMemory::RoundUp(, GSLocalMemory::GetBlockSize(m_rs.DISPFB[i].PSM));

			ZeroMemory(&rt[i].rd, sizeof(rt[i].rd));
			if(m_pRT[i]) m_pRT[i]->GetLevelDesc(0, &rt[i].rd);

			if(rt[i].rd.Width != (UINT)rect.right || rt[i].rd.Height != (UINT)rect.bottom)
				m_pRT[i] = NULL;

			if(!m_pRT[i])
			{
				CComPtr<IDirect3DTexture9> pRT;
				D3DLOCKED_RECT lr;
				int nTries = 0, nMaxTries = 10;
				do
				{
					pRT = NULL;
					hr = m_pD3DDev->CreateTexture(rect.right, rect.bottom, 1, 0, D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &pRT, NULL);
					if(FAILED(hr)) break;
					if(SUCCEEDED(pRT->LockRect(0, &lr, NULL, 0)))
						pRT->UnlockRect(0);
					m_pRT[i] = pRT;
				}
				while((((DWORD_PTR)lr.pBits & 0xf) || (lr.Pitch & 0xf)) && ++nTries < nMaxTries);

				if(nTries == nMaxTries) continue;

				ZeroMemory(&rt[i].rd, sizeof(rt[i].rd));
				hr = m_pRT[i]->GetLevelDesc(0, &rt[i].rd);
			}

			rt[i].pRT = m_pRT[i];

			rt[i].scale = scale_t(1, 1);

			D3DLOCKED_RECT lr;
			if(FAILED(hr = rt[i].pRT->LockRect(0, &lr, NULL, 0)))
				continue;

			GIFRegTEX0 TEX0;
			TEX0.TBP0 = m_rs.pDISPFB[i]->FBP<<5;
			TEX0.TBW = m_rs.pDISPFB[i]->FBW;
			TEX0.PSM = m_rs.pDISPFB[i]->PSM;

			GIFRegCLAMP CLAMP;
			CLAMP.WMS = CLAMP.WMT = 1;

#ifdef DEBUG_RENDERTARGETS
			if(::GetAsyncKeyState(VK_SPACE)&0x80000000)
			{
				TEX0.TBP0 = m_ctxt->FRAME.Block();
				TEX0.TBW = m_ctxt->FRAME.FBW;
				TEX0.PSM = m_ctxt->FRAME.PSM;
			}

			MSG msg;
			ZeroMemory(&msg, sizeof(msg));
			while(msg.message != WM_QUIT)
			{
				if(PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE))
				{
					TranslateMessage(&msg);
					DispatchMessage(&msg);
				}
				else if(!(::GetAsyncKeyState(VK_RCONTROL)&0x80000000))
				{
					break;
				}
			}

			if(::GetAsyncKeyState(VK_LCONTROL)&0x80000000)
				Sleep(500);

#endif

			m_lm.ReadTexture(rect, (BYTE*)lr.pBits, lr.Pitch, TEX0, m_de.TEXA, CLAMP);

			rt[i].pRT->UnlockRect(0);
		}
	}

	FinishFlip(rt);
}

template <class Vertex>
void GSRendererSoft<Vertex>::EndFrame()
{
}

template <class Vertex>
void GSRendererSoft<Vertex>::RowInit(int x, int y)
{
	m_faddr_x0 = (m_ctxt->ftbl->pa)(0, y, m_ctxt->FRAME.FBP<<5, m_ctxt->FRAME.FBW);
	m_zaddr_x0 = (m_ctxt->ztbl->pa)(0, y, m_ctxt->ZBUF.ZBP<<5, m_ctxt->FRAME.FBW);

	m_faddr_ro = &m_ctxt->ftbl->rowOffset[y&7][x];
	m_zaddr_ro = &m_ctxt->ztbl->rowOffset[y&7][x];

	m_fx = x-1; // -1 because RowStep() will do +1, yea lame...
	m_fy = y;

	RowStep();
}

template <class Vertex>
void GSRendererSoft<Vertex>::RowStep()
{
	m_fx++;

	m_faddr = m_faddr_x0 + *m_faddr_ro++;
	m_zaddr = m_zaddr_x0 + *m_zaddr_ro++;
}

template <class Vertex>
void GSRendererSoft<Vertex>::DrawPoint(Vertex* v)
{
	CPoint p = *v;
	if(!m_scissor.PtInRect(p))
	{
		RowInit(p.x, p.y);
		(this->*m_pDrawVertex)(*v);
	}
}

template <class Vertex>
void GSRendererSoft<Vertex>::DrawLine(Vertex* v)
{
	Vertex dv = v[1] - v[0];

	Vertex::Vector dp = dv.p;
	dp.x.abs();
	dp.y.abs();

	int dx = (int)dp.x;
	int dy = (int)dp.y;

	if(dx == 0 && dy == 0) return;

	int i = dx > dy ? 0 : 1;

	Vertex edge = v[0];
	Vertex dedge = dv / dp.v[i];

	// TODO: clip with the scissor

	int steps = (int)dp.v[i];

	while(steps-- > 0)
	{
		CPoint p = edge;

		if(m_scissor.PtInRect(p))
		{
			RowInit(p.x, p.y);
			(this->*m_pDrawVertex)(edge);
		}

		edge += dedge;
	}
}

template <class Vertex>
void GSRendererSoft<Vertex>::DrawTriangle(Vertex* v)
{
	if(v[1].p.y < v[0].p.y) {Exchange(&v[0], &v[1]);}
	if(v[2].p.y < v[0].p.y) {Exchange(&v[0], &v[2]);}
	if(v[2].p.y < v[1].p.y) {Exchange(&v[1], &v[2]);}

	if(!(v[0].p.y < v[2].p.y)) return;

	Vertex v01 = v[1] - v[0];
	Vertex v02 = v[2] - v[0];

	Vertex::Scalar temp = v01.p.y / v02.p.y;
	Vertex::Scalar longest = temp * v02.p.x - v01.p.x;

	int ledge, redge;
	if(Vertex::Scalar(0) < longest) {ledge = 0; redge = 1; if(longest < Vertex::Scalar(1)) longest = Vertex::Scalar(1);}
	else if(longest < Vertex::Scalar(0)) {ledge = 1; redge = 0; if(Vertex::Scalar(-1) < longest) longest = Vertex::Scalar(-1);}
	else return;

	Vertex edge[2] = {v[0], v[0]};

	Vertex dedge[2];
	dedge[0].p.y = dedge[1].p.y = Vertex::Scalar(1);
	if(Vertex::Scalar(0) < v01.p.y) dedge[ledge] = v01 / v01.p.y;
	if(Vertex::Scalar(0) < v02.p.y) dedge[redge] = v02 / v02.p.y;

	Vertex scan;

	Vertex dscan = (v02 * temp - v01) / longest;
	dscan.p.y = 0;

	for(int i = 0; i < 2; i++, v++)
	{ 
		int top = edge[0].p.y.ceil_i(), bottom = v[1].p.y.ceil_i();
		if(top < m_scissor.top) top = min(m_scissor.top, bottom);
		if(bottom > m_scissor.bottom) bottom = m_scissor.bottom;
		if(edge[0].p.y < Vertex::Scalar(top)) // for(int j = 0; j < 2; j++) edge[j] += dedge[j] * ((float)top - edge[0].p.y);
		{
			Vertex::Scalar dy = Vertex::Scalar(top) - edge[0].p.y;
			edge[0] += dedge[0] * dy;
			edge[1].p.x += dedge[1].p.x * dy;
			edge[0].p.y = edge[1].p.y = Vertex::Scalar(top);
		}

		ASSERT(top >= bottom || (int)((edge[1].p.y - edge[0].p.y) * 10) == 0);

		for(; top < bottom; top++)
		{
			scan = edge[0];

			int left = edge[0].p.x.ceil_i(), right = edge[1].p.x.ceil_i();
			if(left < m_scissor.left) left = m_scissor.left;
			if(right > m_scissor.right) right = m_scissor.right;
			if(edge[0].p.x < Vertex::Scalar(left))
			{
				scan += dscan * (Vertex::Scalar(left) - edge[0].p.x);
				scan.p.x = Vertex::Scalar(left);
			}

			RowInit(left, top);

			for(int steps = right - left; steps > 0; steps--)
			{
				(this->*m_pDrawVertex)(scan);
				scan += dscan;
				RowStep();
			}

			// for(int j = 0; j < 2; j++) edge[j] += dedge[j];
			edge[0] += dedge[0];
			edge[1].p += dedge[1].p;
		}

		if(v[1].p.y < v[2].p.y)
		{
			edge[ledge] = v[1];
			dedge[ledge] = (v[2] - v[1]) / (v[2].p.y - v[1].p.y);
			edge[ledge] += dedge[ledge] * (edge[ledge].p.y.ceil_s() - edge[ledge].p.y);
		}
	}
}

template <class Vertex>
void GSRendererSoft<Vertex>::DrawSprite(Vertex* v)
{
	if(v[2].p.y < v[0].p.y) {Exchange(&v[0], &v[2]); Exchange(&v[1], &v[3]);}
	if(v[1].p.x < v[0].p.x) {Exchange(&v[0], &v[1]); Exchange(&v[2], &v[3]);}

	if(v[0].p.x == v[1].p.x || v[0].p.y == v[2].p.y) return;

	Vertex v01 = v[1] - v[0];
	Vertex v02 = v[2] - v[0];

	Vertex edge = v[0];
	Vertex dedge = v02 / v02.p.y;
	Vertex scan;
	Vertex dscan = v01 / v01.p.x;

	int top = v[0].p.y.ceil_i(), bottom = v[2].p.y.ceil_i();
	if(top < m_scissor.top) top = min(m_scissor.top, bottom);
	if(bottom > m_scissor.bottom) bottom = m_scissor.bottom;
	if(v[0].p.y < Vertex::Scalar(top)) edge += dedge * (Vertex::Scalar(top) - v[0].p.y);

	int left = v[0].p.x.ceil_i(), right = v[1].p.x.ceil_i();
	if(left < m_scissor.left) left = m_scissor.left;
	if(right > m_scissor.right) right = m_scissor.right;
	if(v[0].p.x < Vertex::Scalar(left)) edge += dscan * (Vertex::Scalar(left) - v[0].p.x);

	if(DrawFilledRect(left, top, right, bottom, edge))
		return;

	for(; top < bottom; top++)
	{
		scan = edge;

		RowInit(left, top);

		for(int steps = right - left; steps > 0; steps--)
		{
			(this->*m_pDrawVertex)(scan);
			scan += dscan;
			RowStep();
		}

		edge += dedge;
	}
}

template <class Vertex>
bool GSRendererSoft<Vertex>::DrawFilledRect(int left, int top, int right, int bottom, const Vertex& v)
{
	if(left >= right || top >= bottom)
		return false;

	ASSERT(top >= 0);
	ASSERT(bottom >= 0);

	if(m_pPRIM->IIP
	|| m_ctxt->TEST.ZTE && m_ctxt->TEST.ZTST != 1
	|| m_ctxt->TEST.ATE && m_ctxt->TEST.ATST != 1
	|| m_ctxt->TEST.DATE
	|| m_pPRIM->TME
	|| m_pPRIM->ABE
	|| m_pPRIM->FGE
	|| m_de.DTHE.DTHE
	|| m_ctxt->FRAME.FBMSK)
		return false;

	DWORD FBP = m_ctxt->FRAME.FBP<<5, FBW = m_ctxt->FRAME.FBW;
	DWORD ZBP = m_ctxt->ZBUF.ZBP<<5;

	if(!m_ctxt->ZBUF.ZMSK)
	{
		m_lm.FillRect(CRect(left, top, right, bottom), v.GetZ(), m_ctxt->ZBUF.PSM, ZBP, FBW);
	}

	__declspec(align(16)) union {struct {short Rf, Gf, Bf, Af;}; UINT64 Cui64;};
	Cui64 = v.c;

	Rf = m_clamp[Rf];
	Gf = m_clamp[Gf];
	Bf = m_clamp[Bf];
	Af |= m_ctxt->FBA.FBA << 7;

	DWORD Cdw;
	
	if(m_ctxt->FRAME.PSM == PSM_PSMCT16 || m_ctxt->FRAME.PSM == PSM_PSMCT16S)
	{
		Cdw = ((DWORD)(Rf&0xf8) >> 3)
			| ((DWORD)(Gf&0xf8) << 2) 
			| ((DWORD)(Bf&0xf8) << 7) 
			| ((DWORD)(Af&0x80) << 8);
	}
	else
	{
#if _M_IX86_FP >= 2 || defined(_M_AMD64)
		__m128i r0 = _mm_load_si128((__m128i*)&Cui64);
		Cdw = (DWORD)_mm_cvtsi128_si32(_mm_packus_epi16(r0, r0));
#else
		Cdw = ((DWORD)(Rf&0xff) << 0)
			| ((DWORD)(Gf&0xff) << 8) 
			| ((DWORD)(Bf&0xff) << 16) 
			| ((DWORD)(Af&0xff) << 24);
#endif
	}

	m_lm.FillRect(CRect(left, top, right, bottom), Cdw, m_ctxt->FRAME.PSM, FBP, FBW);

	return true;
}

template <class Vertex>
template <int iZTST, int iATST>
void GSRendererSoft<Vertex>::DrawVertex(const Vertex& v)
{
	DWORD vz;

	switch(iZTST)
	{
	case 0: return;
	case 1: break;
	case 2: vz = v.GetZ(); if(vz < (m_lm.*m_ctxt->ztbl->rpa)(m_zaddr)) return; break;
	case 3: vz = v.GetZ(); if(vz <= (m_lm.*m_ctxt->ztbl->rpa)(m_zaddr)) return; break;
	default: __assume(0);
	}

	union
	{
		struct {Vertex::Vector Cf, Cd, Ca;};
		struct {Vertex::Vector Cfda[3];};
	};

	Cf = v.c;

	if(m_pPRIM->TME)
	{
		(this->*m_pDrawVertexTFX)(Cf, v);
	}

	if(m_pPRIM->FGE)
	{
		Vertex::Scalar a = Cf.a;
		Vertex::Vector Cfog((DWORD)m_de.FOGCOL.ai32[0]);
		Cf = Cfog + (Cf - Cfog) * v.t.z;
		Cf.a = a;
	}

	BOOL ZMSK = m_ctxt->ZBUF.ZMSK;
	DWORD FBMSK = m_ctxt->FRAME.FBMSK;

	bool fAlphaPass = true;

	BYTE Af = (BYTE)(int)Cf.a;

	switch(iATST)
	{
	case 0: fAlphaPass = false; break;
	case 1: fAlphaPass = true; break;
	case 2: fAlphaPass = Af < m_ctxt->TEST.AREF; break;
	case 3: fAlphaPass = Af <= m_ctxt->TEST.AREF; break;
	case 4: fAlphaPass = Af == m_ctxt->TEST.AREF; break;
	case 5: fAlphaPass = Af >= m_ctxt->TEST.AREF; break;
	case 6: fAlphaPass = Af > m_ctxt->TEST.AREF; break;
	case 7: fAlphaPass = Af != m_ctxt->TEST.AREF; break;
	default: __assume(0);
	}

	if(!fAlphaPass)
	{
		switch(m_ctxt->TEST.AFAIL)
		{
		case 0: return;
		case 1: ZMSK = 1; break; // RGBA
		case 2: FBMSK = 0xffffffff; break; // Z
		case 3: FBMSK = 0xff000000; ZMSK = 1; break; // RGB
		default: __assume(0);
		}
	}

	if(!ZMSK)
	{
		if(iZTST != 2 && iZTST != 3) vz = v.GetZ(); 
		(m_lm.*m_ctxt->ztbl->wpa)(m_zaddr, vz);
	}

	if(FBMSK != ~0)
	{
		if(m_ctxt->TEST.DATE && m_ctxt->FRAME.PSM <= PSM_PSMCT16S && m_ctxt->FRAME.PSM != PSM_PSMCT24)
		{
			BYTE A = (BYTE)((m_lm.*m_ctxt->ftbl->rpa)(m_faddr) >> (m_ctxt->FRAME.PSM == PSM_PSMCT32 ? 31 : 15));
			if(A ^ m_ctxt->TEST.DATM) return;
		}

		// FIXME: for AA1 the value of Af should be calculated from the pixel coverage...

		bool fABE = (m_pPRIM->ABE || m_pPRIM->AA1 && (m_pPRIM->PRIM == 1 || m_pPRIM->PRIM == 2)) && (!m_de.PABE.PABE || (int)Cf.a >= 0x80);

		if(FBMSK || fABE)
		{
			GIFRegTEXA TEXA;
			/*
			TEXA.AEM = 0;
			TEXA.TA0 = 0;
			TEXA.TA1 = 0x80;
			*/
			TEXA.ai32[0] = 0;
			TEXA.ai32[1] = 0x80;
			Cd = (m_lm.*m_ctxt->ftbl->rta)(m_faddr, m_ctxt->TEX0, TEXA);
		}

		if(fABE)
		{
			Ca = Vertex::Vector(Vertex::Scalar(0));
			Ca.a = Vertex::Scalar((int)m_ctxt->ALPHA.FIX);

			Vertex::Scalar a = Cf.a;
			Cf = ((Cfda[m_ctxt->ALPHA.A] - Cfda[m_ctxt->ALPHA.B]) * Cfda[m_ctxt->ALPHA.C].a >> 7) + Cfda[m_ctxt->ALPHA.D];
			Cf.a = a;
		}

		DWORD Cdw; 

		if(m_de.COLCLAMP.CLAMP && !m_de.DTHE.DTHE)
		{
			Cdw = Cf;
		}
		else
		{
			__declspec(align(16)) union {struct {short Rf, Gf, Bf, Af;}; UINT64 Cui64;};
			Cui64 = Cf;

			if(m_de.DTHE.DTHE)
			{
				short DMxy = (signed char)((*((WORD*)&m_de.DIMX.i64 + (m_fy&3)) >> ((m_fx&3)<<2)) << 5) >> 5;
				Rf = (short)(Rf + DMxy);
				Gf = (short)(Gf + DMxy);
				Bf = (short)(Bf + DMxy);
			}

			Rf = m_clamp[Rf];
			Gf = m_clamp[Gf];
			Bf = m_clamp[Bf];
			Af |= m_ctxt->FBA.FBA << 7;

#if _M_IX86_FP >= 2 || defined(_M_AMD64)
			__m128i r0 = _mm_load_si128((__m128i*)&Cui64);
			Cdw = (DWORD)_mm_cvtsi128_si32(_mm_packus_epi16(r0, r0));
#else
			Cdw = ((DWORD)(Rf&0xff) << 0)
				| ((DWORD)(Gf&0xff) << 8) 
				| ((DWORD)(Bf&0xff) << 16) 
				| ((DWORD)(Af&0xff) << 24);
#endif
		}

		if(FBMSK != 0)
		{
			Cdw = (Cdw & ~FBMSK) | ((DWORD)Cd & FBMSK);
		}

		(m_lm.*m_ctxt->ftbl->wfa)(m_faddr, Cdw);
	}
}

static const float one_over_log2 = 1.0f / log(2.0f);

template <class Vertex>
template <int iLOD, bool bLCM, bool bTCC, int iTFX>
void GSRendererSoft<Vertex>::DrawVertexTFX(typename Vertex::Vector& Cf, const Vertex& v)
{
	ASSERT(m_pPRIM->TME);
	
	Vertex::Vector t = v.t;

	bool fBiLinear = iLOD == 2; 

	if(iLOD == 3)
	{
		fBiLinear = bLCM;
	}
	else
	{
		t.q.rcp();
		t *= t.q;

		if(iLOD == 1)
		{
			float lod = (float)(int)m_ctxt->TEX1.K;
			if(!bLCM) lod += log(fabs((float)t.q)) * one_over_log2 * (1 << m_ctxt->TEX1.L);
			fBiLinear = lod <= 0 && (m_ctxt->TEX1.MMAG & 1) || lod > 0 && (m_ctxt->TEX1.MMIN & 1);
		}
	}

	if(fBiLinear) t -= Vertex::Scalar(0.5f);

	__declspec(align(16)) short ituv[8] = 
	{
		(short)(int)t.x, 
		(short)(int)t.x+1, 
		(short)(int)t.y, 
		(short)(int)t.y+1
	};

#if _M_IX86_FP >= 2 || defined(_M_AMD64)

	__m128i uv = _mm_load_si128((__m128i*)ituv);
	__m128i mask = _mm_load_si128((__m128i*)m_uv->mask);
	__m128i region = _mm_or_si128(_mm_and_si128(uv, *(__m128i*)m_uv->and), *(__m128i*)m_uv->or);
	__m128i clamp = _mm_min_epi16(_mm_max_epi16(uv, *(__m128i*)m_uv->min), *(__m128i*)m_uv->max);
	_mm_store_si128((__m128i*)ituv, _mm_or_si128(_mm_and_si128(region, mask), _mm_andnot_si128(mask, clamp)));

#else

	for(int i = 0; i < 4; i++)
	{
		short region = (ituv[i] & m_uv->and[i]) | m_uv->or[i];
		short clamp = ituv[i] < m_uv->min[i] ? m_uv->min[i] : ituv[i] > m_uv->max[i] ? m_uv->max[i] : ituv[i];
		ituv[i] = (region & m_uv->mask[i]) | (clamp & ~m_uv->mask[i]);
	}

#endif

	Vertex::Vector Ct[4];

	if(fBiLinear)
	{
		if(0 && m_pTexture)
		{
			Ct[0] = m_pTexture[(ituv[2] << m_ctxt->TEX0.TW) + ituv[0]];
			Ct[1] = m_pTexture[(ituv[2] << m_ctxt->TEX0.TW) + ituv[1]];
			Ct[2] = m_pTexture[(ituv[3] << m_ctxt->TEX0.TW) + ituv[0]];
			Ct[3] = m_pTexture[(ituv[3] << m_ctxt->TEX0.TW) + ituv[1]];
		}
		else
		{
			Ct[0] = (m_lm.*m_ctxt->ttbl->rt)(ituv[0], ituv[2], m_ctxt->TEX0, m_de.TEXA);
			Ct[1] = (m_lm.*m_ctxt->ttbl->rt)(ituv[1], ituv[2], m_ctxt->TEX0, m_de.TEXA);
			Ct[2] = (m_lm.*m_ctxt->ttbl->rt)(ituv[0], ituv[3], m_ctxt->TEX0, m_de.TEXA);
			Ct[3] = (m_lm.*m_ctxt->ttbl->rt)(ituv[1], ituv[3], m_ctxt->TEX0, m_de.TEXA);
		}

		Vertex::Vector ft = t - t.floor();

		Ct[0] = Ct[0] + (Ct[1] - Ct[0]) * ft.x;
		Ct[2] = Ct[2] + (Ct[3] - Ct[2]) * ft.x;
		Ct[0] = Ct[0] + (Ct[2] - Ct[0]) * ft.y;
	}
	else 
	{
		if(0 && m_pTexture)
		{
			Ct[0] = m_pTexture[(ituv[2] << m_ctxt->TEX0.TW) + ituv[0]];
		}
		else
		{
			Ct[0] = (m_lm.*m_ctxt->ttbl->rt)(ituv[0], ituv[2], m_ctxt->TEX0, m_de.TEXA);
		}
	}

	Vertex::Scalar a = Cf.a;

	switch(iTFX)
	{
	case 0:
		Cf = (Cf * Ct[0] >> 7);
		if(!bTCC) Cf.a = a;
		break;
	case 1:
		Cf = Ct[0];
		break;
	case 2:
		Cf = (Cf * Ct[0] >> 7) + Cf.a;
		Cf.a = !bTCC ? a : (Ct[0].a + a);
		break;
	case 3:
		Cf = (Cf * Ct[0] >> 7) + Cf.a;
		Cf.a = !bTCC ? a : Ct[0].a;
		break;
	default: 
		__assume(0);
	}

	Cf.sat();
}

template <class Vertex>
void GSRendererSoft<Vertex>::SetupTexture()
{
	if(!m_pPRIM->TME) return;
	
	m_lm.SetupCLUT32(m_ctxt->TEX0, m_de.TEXA);

	//

	int tw = 1 << m_ctxt->TEX0.TW;
	int th = 1 << m_ctxt->TEX0.TH;

	switch(m_ctxt->CLAMP.WMS)
	{
	case 0: m_uv->and[0] = (short)(tw-1); m_uv->or[0] = 0; m_uv->mask[0] = 0xffff; break;
	case 1: m_uv->min[0] = 0; m_uv->max[0] = (short)(tw-1); m_uv->mask[0] = 0; break;
	case 2: m_uv->min[0] = (short)m_ctxt->CLAMP.MINU; m_uv->max[0] = (short)m_ctxt->CLAMP.MAXU; m_uv->mask[0] = 0; break;
	case 3: m_uv->and[0] = (short)m_ctxt->CLAMP.MINU; m_uv->or[0] = (short)m_ctxt->CLAMP.MAXU; m_uv->mask[0] = 0xffff; break;
	default: __assume(0);
	}

	m_uv->and[1] = m_uv->and[0];
	m_uv->or[1] = m_uv->or[0];
	m_uv->min[1] = m_uv->min[0];
	m_uv->max[1] = m_uv->max[0];
	m_uv->mask[1] = m_uv->mask[0];

	switch(m_ctxt->CLAMP.WMT)
	{
	case 0: m_uv->and[2] = (short)(th-1); m_uv->or[2] = 0; m_uv->mask[2] = 0xffff; break;
	case 1: m_uv->min[2] = 0; m_uv->max[2] = (short)(th-1); m_uv->mask[2] = 0; break;
	case 2: m_uv->min[2] = (short)m_ctxt->CLAMP.MINV; m_uv->max[2] = (short)m_ctxt->CLAMP.MAXV; m_uv->mask[2] = 0; break;
	case 3: m_uv->and[2] = (short)m_ctxt->CLAMP.MINV; m_uv->or[2] = (short)m_ctxt->CLAMP.MAXV; m_uv->mask[2] = 0xffff; break;
	default: __assume(0);
	}

	m_uv->and[3] = m_uv->and[2];
	m_uv->or[3] = m_uv->or[2];
	m_uv->min[3] = m_uv->min[2];
	m_uv->max[3] = m_uv->max[2];
	m_uv->mask[3] = m_uv->mask[2];
}

//
// GSRendererSoftFP
//

GSRendererSoftFP::GSRendererSoftFP(HWND hWnd, HRESULT& hr)
	: GSRendererSoft<GSSoftVertexFP>(hWnd, hr)
{
}

void GSRendererSoftFP::VertexKick(bool fSkip)
{
	GSSoftVertexFP& v = m_vl.AddTail();

	v.c = (DWORD)m_v.RGBAQ.ai32[0];

	v.p.x = (int)m_v.XYZ.X - (int)m_ctxt->XYOFFSET.OFX;
	v.p.y = (int)m_v.XYZ.Y - (int)m_ctxt->XYOFFSET.OFY;
	v.p *= GSSoftVertexFP::Scalar(1.0f/16);
	v.p.z = (float)(m_v.XYZ.Z >> 16);
	v.p.q = (float)(m_v.XYZ.Z & 0xffff);

	if(m_pPRIM->TME)
	{
		if(m_pPRIM->FST)
		{
			v.t.x = (float)(int)m_v.UV.U;
			v.t.y = (float)(int)m_v.UV.V;
			v.t *= GSSoftVertexFP::Scalar(1.0f/16);
			v.t.q = 1.0f;
		}
		else
		{
			v.t.x = m_v.ST.S * (1 << m_ctxt->TEX0.TW);
			v.t.y = m_v.ST.T * (1 << m_ctxt->TEX0.TH);
			v.t.q = m_v.RGBAQ.Q;
		}
	}

	if(m_pPRIM->FGE)
	{
		v.t.z = (float)m_v.FOG.F * (1.0f/255);
	}

	__super::VertexKick(fSkip);
}
/*
//
// GSRendererSoftFX
//

GSRendererSoftFX::GSRendererSoftFX(HWND hWnd, HRESULT& hr)
	: GSRendererSoft<GSSoftVertexFX>(hWnd, hr)
{
}

void GSRendererSoftFX::VertexKick(bool fSkip)
{
	GSSoftVertexFX& v = m_vl.AddTail();

	v.c = (DWORD)m_v.RGBAQ.ai32[0];

	v.p.x = ((int)m_v.XYZ.X - (int)m_ctxt->XYOFFSET.OFX) << 12;
	v.p.y = ((int)m_v.XYZ.Y - (int)m_ctxt->XYOFFSET.OFY) << 12;
	v.p.z = (int)((m_v.XYZ.Z & 0xffff0000) >> 1);
	v.p.q = (int)((m_v.XYZ.Z & 0x0000ffff) << 15);

	if(m_pPRIM->TME)
	{
		if(m_pPRIM->FST)
		{
			v.t.x = ((int)m_v.UV.U << (12 >> m_ctxt->TEX0.TW));
			v.t.y = ((int)m_v.UV.V << (12 >> m_ctxt->TEX0.TH));
			v.t.q = 1<<16;
		}
		else
		{
			// TODO
			v.t.x = m_v.ST.S;
			v.t.y = m_v.ST.T;
			v.t.q = m_v.RGBAQ.Q;
		}
	}

	if(m_pPRIM->FGE)
	{
		v.t.z = (int)m_v.FOG.F << 8;
	}

	__super::VertexKick(fSkip);
}
*/



See more files for this project here

guliverkli

Home of VobSub, Media Player Classic (MPC) and other misc utils.

Project homepage: http://sourceforge.net/projects/guliverkli
Programming language(s): C,C++,PHP
License: other

  res/
    GSdx9.rc2
    hlsl_merge.fx
    hlsl_rb.fx
    hlsl_tfx.fx
    logo1.bmp
    ps11_en00.psh
    ps11_en01.psh
    ps11_en10.psh
    ps11_en11.psh
    ps11_tfx000.psh
    ps11_tfx010.psh
    ps11_tfx011.psh
    ps11_tfx1x0.psh
    ps11_tfx1x1.psh
    ps11_tfx200.psh
    ps11_tfx210.psh
    ps11_tfx211.psh
    ps11_tfx300.psh
    ps11_tfx310.psh
    ps11_tfx311.psh
    ps11_tfx4xx.psh
    ps14_en00.psh
    ps14_en01.psh
    ps14_en10.psh
    ps14_en11.psh
  GS.cpp
  GS.h
  GSCapture.cpp
  GSCapture.h
  GSCaptureDlg.cpp
  GSCaptureDlg.h
  GSHash.cpp
  GSHash.h
  GSLocalMemory.cpp
  GSLocalMemory.h
  GSPerfMon.cpp
  GSPerfMon.h
  GSRegs.cpp
  GSRenderer.cpp
  GSRenderer.h
  GSRendererHW.cpp
  GSRendererHW.h
  GSRendererNull.cpp
  GSRendererNull.h
  GSRendererSoft.cpp
  GSRendererSoft.h
  GSSettingsDlg.cpp
  GSSettingsDlg.h
  GSSoftVertex.cpp
  GSSoftVertex.h
  GSState.cpp
  GSState.h
  GSTables.cpp
  GSTables.h
  GSTextureCache.cpp
  GSTextureCache.h
  GSTransfer.cpp
  GSUtil.cpp
  GSUtil.h
  GSVertexList.cpp
  GSVertexList.h
  GSWnd.cpp
  GSWnd.h
  GSdx9.cpp
  GSdx9.def
  GSdx9.h
  GSdx9.icproj
  GSdx9.rc
  GSdx9.sln
  GSdx9.vcproj
  GSdx9_ic.sln
  GSdx9_vs2005.sln
  GSdx9_vs2005.vcproj
  resource.h
  stdafx.cpp
  stdafx.h
  x86-32.asm
  x86-64.asm
  x86.cpp
  x86.h