Code Search for Developers
 
 
  

GSSoftVertex.h from guliverkli at Krugle


Show GSSoftVertex.h syntax highlighted

/* 
 *	Copyright (C) 2003-2005 Gabest
 *	http://www.gabest.org
 *
 *  This Program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *   
 *  This Program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *   
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 *  http://www.gnu.org/copyleft/gpl.html
 *
 */

#pragma once

//
// GSSoftVertexFP
//

extern const __m128i _80000000, _4b000000, _3f800000;

__declspec(align(16)) union GSSoftVertexFP
{
	class __declspec(novtable) Scalar
	{
		float val;

	public:
		Scalar() {}
		explicit Scalar(float f) {val = f;}
		explicit Scalar(int i) {val = (float)i;}

		float Value() const {return val;}

#if _M_IX86_FP >= 2 || defined(_M_AMD64)
		void sat() {_mm_store_ss(&val, _mm_min_ss(_mm_max_ss(_mm_set_ss(val), _mm_setzero_ps()), _mm_set_ss(255)));}
		void rcp() {_mm_store_ss(&val, _mm_rcp_ss(_mm_set_ss(val)));}
#else
		void sat() {val = val < 0 ? 0 : val > 255 ? 255 : val;}
		void rcp() {val = 1.0f / val;}
#endif
		void abs() {val = fabs(val);}

		Scalar floor_s() const {return Scalar(floor(val));}
		int floor_i() const {return (int)floor(val);}

		Scalar ceil_s() const {return Scalar(-floor(-val));}
		int ceil_i() const {return -(int)floor(-val);}

		void operator = (float f) {val = f;}
		void operator = (int i) {val = (float)i;}

		operator float() const {return val;}
		operator int() const {return (int)val;}

		void operator += (const Scalar& s) {val += s.val;}
		void operator -= (const Scalar& s) {val -= s.val;}
		void operator *= (const Scalar& s) {val *= s.val;}
		void operator /= (const Scalar& s) {val /= s.val;}

		friend Scalar operator + (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val + s2.val);}
		friend Scalar operator - (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val - s2.val);}
		friend Scalar operator * (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val * s2.val);}
		friend Scalar operator / (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val / s2.val);}

		friend Scalar operator + (const Scalar& s, int i) {return Scalar(s.val + i);}
		friend Scalar operator - (const Scalar& s, int i) {return Scalar(s.val - i);}
		friend Scalar operator * (const Scalar& s, int i) {return Scalar(s.val * i);}
		friend Scalar operator / (const Scalar& s, int i) {return Scalar(s.val / i);}

		friend Scalar operator << (const Scalar& s, int i) {return Scalar(s.val * (1<<i));}
		friend Scalar operator >> (const Scalar& s, int i) {return Scalar(s.val / (1<<i));}

		friend bool operator == (const Scalar& s1, const Scalar& s2) {return s1.val == s2.val;}
		friend bool operator <= (const Scalar& s1, const Scalar& s2) {return s1.val <= s2.val;}
		friend bool operator < (const Scalar& s1, const Scalar& s2) {return s1.val < s2.val;}
	};

	__declspec(align(16)) class __declspec(novtable) Vector
	{
	public:
		union
		{
			union {struct {Scalar x, y, z, q;}; struct {Scalar r, g, b, a;};};
			union {struct {Scalar v[4];}; struct {Scalar c[4];};};
#if _M_IX86_FP >= 2 || defined(_M_AMD64)
			union {__m128 xyzq; __m128 rgba;};
#endif
		};

		Vector() {}
		Vector(const Vector& v) {*this = v;}
		Vector(Scalar s) {*this = s;}
		Vector(Scalar s0, Scalar s1, Scalar s2, Scalar s3) {x = s0; y = s1; z = s2; q = s3;}
		explicit Vector(DWORD dw) {*this = dw;}
#if _M_IX86_FP >= 2 || defined(_M_AMD64)
		Vector(__m128 f0123) {*this = f0123;}
#endif

#if _M_IX86_FP >= 2 || defined(_M_AMD64)

		void operator = (const Vector& v) {xyzq = v.xyzq;}
		void operator = (Scalar s) {xyzq = _mm_set1_ps(s);}

		void operator = (__m128 f0123) {xyzq = f0123;}
		operator __m128() const {return xyzq;}

		void operator = (DWORD dw) {__m128i zero = _mm_setzero_si128(); xyzq = _mm_cvtepi32_ps(_mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(dw), zero), zero));}
		operator DWORD() const {__m128i r0 = _mm_cvttps_epi32(xyzq); r0 = _mm_packs_epi32(r0, r0); r0 = _mm_packus_epi16(r0, r0); return (DWORD)_mm_cvtsi128_si32(r0);}
		operator UINT64() const {__m128i r0 = _mm_cvttps_epi32(xyzq); r0 = _mm_packs_epi32(r0, r0); return *(UINT64*)&r0;}

		void sat() {xyzq = _mm_min_ps(_mm_max_ps(xyzq, _mm_setzero_ps()), _mm_set1_ps(255));}
		void rcp() {xyzq = _mm_rcp_ps(xyzq);}

		Vector floor()
		{
			__m128 sign = _mm_and_ps(xyzq, *(__m128*)&_80000000);
			__m128 r0 = _mm_or_ps(sign, *(__m128*)&_4b000000);
			__m128 r1 = _mm_sub_ps(_mm_add_ps(xyzq, r0), r0);
			__m128 r2 = _mm_sub_ps(r1, xyzq);
			__m128 r3 = _mm_and_ps(_mm_cmpnle_ps(r2, sign), *(__m128*)&_3f800000);
			__m128 r4 = _mm_sub_ps(r1, r3);
			return r4;
		}

		void operator += (const Vector& v) {xyzq = _mm_add_ps(xyzq, v);}
		void operator -= (const Vector& v) {xyzq = _mm_sub_ps(xyzq, v);}
		void operator *= (const Vector& v) {xyzq = _mm_mul_ps(xyzq, v);}
		void operator /= (const Vector& v) {xyzq = _mm_div_ps(xyzq, v);}

#else

		void operator = (const Vector& v) {x = v.x; y = v.y; z = v.z; q = v.q;}
		void operator = (Scalar s) {x = y = z = q = s;}

		void operator = (DWORD dw)
		{
			x = Scalar((int)((dw>>0)&0xff));
			y = Scalar((int)((dw>>8)&0xff));
			z = Scalar((int)((dw>>16)&0xff));
			q = Scalar((int)((dw>>24)&0xff));
		}

		operator DWORD() const
		{
			return (DWORD)(
				(((DWORD)(int)x&0xff)<<0) |
				(((DWORD)(int)y&0xff)<<8) |
				(((DWORD)(int)z&0xff)<<16) |
				(((DWORD)(int)q&0xff)<<24));
		}

		operator UINT64() const
		{
			return (DWORD)(
				(((UINT64)(int)x&0xffff)<<0) |
				(((UINT64)(int)y&0xffff)<<16) |
				(((UINT64)(int)z&0xffff)<<32) |
				(((UINT64)(int)q&0xffff)<<48));
		}

		void sat() {x.sat(); y.sat(); z.sat(); q.sat();}
		void rcp() {x.rcp(); y.rcp(); z.rcp(); q.rcp();}
		
		Vector floor() {return Vector(x.floor_s(), y.floor_s(), z.floor_s(), q.floor_s());}

		void operator += (const Vector& v) {*this = *this + v;}
		void operator -= (const Vector& v) {*this = *this - v;}
		void operator *= (const Vector& v) {*this = *this * v;}
		void operator /= (const Vector& v) {*this = *this / v;}

#endif

		friend Vector operator + (const Vector& v1, const Vector& v2);
		friend Vector operator - (const Vector& v1, const Vector& v2);
		friend Vector operator * (const Vector& v1, const Vector& v2);
		friend Vector operator / (const Vector& v1, const Vector& v2);

		friend Vector operator + (const Vector& v, Scalar s);
		friend Vector operator - (const Vector& v, Scalar s);
		friend Vector operator * (const Vector& v, Scalar s);
		friend Vector operator / (const Vector& v, Scalar s);
	};

	struct {__declspec(align(16)) Vector c, p, t;};
	struct {__declspec(align(16)) Vector sv[3];};
	struct {__declspec(align(16)) Scalar s[12];};

	GSSoftVertexFP() {}
	GSSoftVertexFP(const GSSoftVertexFP& v) {*this = v;}

	void operator = (const GSSoftVertexFP& v) {c = v.c; p = v.p; t = v.t;}
	void operator += (const GSSoftVertexFP& v) {c += v.c; p += v.p; t += v.t;}

	operator CPoint() const {return CPoint((int)p.x, (int)p.y);}

	__forceinline DWORD GetZ() const 
	{
		ASSERT((float)p.z >= 0 && (float)p.q >= 0);
#if _M_IX86_FP >= 2 || defined(_M_AMD64)
		__m128 z = _mm_shuffle_ps(p, p, _MM_SHUFFLE(2,2,2,2));
		__m128 q = _mm_shuffle_ps(p, p, _MM_SHUFFLE(3,3,3,3));
		// TODO: check if our floor is faster than doing ss->si->ss
		int zh = _mm_cvttss_si32(z);
		__m128 zhi = _mm_cvtsi32_ss(zhi, zh);
		__m128 zhf = _mm_mul_ss(_mm_sub_ss(z, zhi), _mm_set_ss(65536));
		int zl = _mm_cvtss_si32(_mm_add_ss(zhf, q));
		return ((DWORD)zh << 16) + (DWORD)zl;
#else
		// return ((DWORD)(int)p.z << 16) + (DWORD)(int)((p.z - p.z.floor_s())*65536 + p.q);

		int z = (int)p.z;
		return ((DWORD)z << 16) + (DWORD)(((float)p.z - z)*65536 + (float)p.q);
#endif
	}

};

#if _M_IX86_FP >= 2 || defined(_M_AMD64)

__forceinline GSSoftVertexFP::Vector operator + (const GSSoftVertexFP::Vector& v1, const GSSoftVertexFP::Vector& v2) {return GSSoftVertexFP::Vector(_mm_add_ps(v1, v2));}
__forceinline GSSoftVertexFP::Vector operator - (const GSSoftVertexFP::Vector& v1, const GSSoftVertexFP::Vector& v2) {return GSSoftVertexFP::Vector(_mm_sub_ps(v1, v2));}
__forceinline GSSoftVertexFP::Vector operator * (const GSSoftVertexFP::Vector& v1, const GSSoftVertexFP::Vector& v2) {return GSSoftVertexFP::Vector(_mm_mul_ps(v1, v2));}
__forceinline GSSoftVertexFP::Vector operator / (const GSSoftVertexFP::Vector& v1, const GSSoftVertexFP::Vector& v2) {return GSSoftVertexFP::Vector(_mm_div_ps(v1, v2));}

__forceinline GSSoftVertexFP::Vector operator + (const GSSoftVertexFP::Vector& v, GSSoftVertexFP::Scalar s) {return GSSoftVertexFP::Vector(_mm_add_ps(v, _mm_set1_ps(s)));}
__forceinline GSSoftVertexFP::Vector operator - (const GSSoftVertexFP::Vector& v, GSSoftVertexFP::Scalar s) {return GSSoftVertexFP::Vector(_mm_sub_ps(v, _mm_set1_ps(s)));}
__forceinline GSSoftVertexFP::Vector operator * (const GSSoftVertexFP::Vector& v, GSSoftVertexFP::Scalar s) {return GSSoftVertexFP::Vector(_mm_mul_ps(v, _mm_set1_ps(s)));}
__forceinline GSSoftVertexFP::Vector operator / (const GSSoftVertexFP::Vector& v, GSSoftVertexFP::Scalar s) {return GSSoftVertexFP::Vector(_mm_div_ps(v, _mm_set1_ps(s)));}

__forceinline GSSoftVertexFP::Vector operator << (const GSSoftVertexFP::Vector& v, int i) {return GSSoftVertexFP::Vector(_mm_mul_ps(v, _mm_set1_ps((float)(1 << i))));}
__forceinline GSSoftVertexFP::Vector operator >> (const GSSoftVertexFP::Vector& v, int i) {return GSSoftVertexFP::Vector(_mm_mul_ps(v, _mm_set1_ps(1.0f / (1 << i))));}

#else

__forceinline GSSoftVertexFP::Vector operator + (const GSSoftVertexFP::Vector& v1, const GSSoftVertexFP::Vector& v2) {return GSSoftVertexFP::Vector(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.q + v2.q);}
__forceinline GSSoftVertexFP::Vector operator - (const GSSoftVertexFP::Vector& v1, const GSSoftVertexFP::Vector& v2) {return GSSoftVertexFP::Vector(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.q - v2.q);}
__forceinline GSSoftVertexFP::Vector operator * (const GSSoftVertexFP::Vector& v1, const GSSoftVertexFP::Vector& v2) {return GSSoftVertexFP::Vector(v1.x * v2.x, v1.y * v2.y, v1.z * v2.z, v1.q * v2.q);}
__forceinline GSSoftVertexFP::Vector operator / (const GSSoftVertexFP::Vector& v1, const GSSoftVertexFP::Vector& v2) {return GSSoftVertexFP::Vector(v1.x / v2.x, v1.y / v2.y, v1.z / v2.z, v1.q / v2.q);}

__forceinline GSSoftVertexFP::Vector operator + (const GSSoftVertexFP::Vector& v, GSSoftVertexFP::Scalar s) {return GSSoftVertexFP::Vector(v.x + s, v.y + s, v.z + s, v.q + s);}
__forceinline GSSoftVertexFP::Vector operator - (const GSSoftVertexFP::Vector& v, GSSoftVertexFP::Scalar s) {return GSSoftVertexFP::Vector(v.x - s, v.y - s, v.z - s, v.q - s);}
__forceinline GSSoftVertexFP::Vector operator * (const GSSoftVertexFP::Vector& v, GSSoftVertexFP::Scalar s) {return GSSoftVertexFP::Vector(v.x * s, v.y * s, v.z * s, v.q * s);}
__forceinline GSSoftVertexFP::Vector operator / (const GSSoftVertexFP::Vector& v, GSSoftVertexFP::Scalar s) {return GSSoftVertexFP::Vector(v.x / s, v.y / s, v.z / s, v.q / s);}

__forceinline GSSoftVertexFP::Vector operator << (const GSSoftVertexFP::Vector& v, int i) {return GSSoftVertexFP::Vector(v.x << i, v.y << i, v.z << i, v.q << i);}
__forceinline GSSoftVertexFP::Vector operator >> (const GSSoftVertexFP::Vector& v, int i) {return GSSoftVertexFP::Vector(v.x >> i, v.y >> i, v.z >> i, v.q >> i);}

#endif

//

template <class Vertex>
__forceinline Vertex operator + (const Vertex& v1, const Vertex& v2)
{
	Vertex v0;
	v0.c = v1.c + v2.c;
	v0.p = v1.p + v2.p;
	v0.t = v1.t + v2.t;
	return v0;
}

template <class Vertex>
__forceinline Vertex operator - (const Vertex& v1, const Vertex& v2)
{
	Vertex v0;
	v0.c = v1.c - v2.c;
	v0.p = v1.p - v2.p;
	v0.t = v1.t - v2.t;
	return v0;
}

template <class Vertex>
__forceinline Vertex operator * (const Vertex& v, typename Vertex::Scalar s)
{
	Vertex v0;
	Vertex::Vector vs(s);
	v0.c = v.c * vs;
	v0.p = v.p * vs;
	v0.t = v.t * vs;
	return v0;
}

template <class Vertex>
__forceinline Vertex operator / (const Vertex& v, typename Vertex::Scalar s)
{
	Vertex v0;
	Vertex::Vector vs(s);
	v0.c = v.c / vs;
	v0.p = v.p / vs;
	v0.t = v.t / vs;
	return v0;
}

template <class Vertex>
__forceinline void Exchange(Vertex* RESTRICT v1, Vertex* RESTRICT v2)
{
	typename Vertex::Vector c = v1->c, p = v1->p, t = v1->t;
	v1->c = v2->c; v1->p = v2->p; v1->t = v2->t;
	v2->c = c; v2->p = p; v2->t = t;
}




See more files for this project here

guliverkli

Home of VobSub, Media Player Classic (MPC) and other misc utils.

Project homepage: http://sourceforge.net/projects/guliverkli
Programming language(s): C,C++,PHP
License: other

  res/
    GSdx9.rc2
    hlsl_merge.fx
    hlsl_rb.fx
    hlsl_tfx.fx
    logo1.bmp
    ps11_en00.psh
    ps11_en01.psh
    ps11_en10.psh
    ps11_en11.psh
    ps11_tfx000.psh
    ps11_tfx010.psh
    ps11_tfx011.psh
    ps11_tfx1x0.psh
    ps11_tfx1x1.psh
    ps11_tfx200.psh
    ps11_tfx210.psh
    ps11_tfx211.psh
    ps11_tfx300.psh
    ps11_tfx310.psh
    ps11_tfx311.psh
    ps11_tfx4xx.psh
    ps14_en00.psh
    ps14_en01.psh
    ps14_en10.psh
    ps14_en11.psh
  GS.cpp
  GS.h
  GSCapture.cpp
  GSCapture.h
  GSCaptureDlg.cpp
  GSCaptureDlg.h
  GSHash.cpp
  GSHash.h
  GSLocalMemory.cpp
  GSLocalMemory.h
  GSPerfMon.cpp
  GSPerfMon.h
  GSRegs.cpp
  GSRenderer.cpp
  GSRenderer.h
  GSRendererHW.cpp
  GSRendererHW.h
  GSRendererNull.cpp
  GSRendererNull.h
  GSRendererSoft.cpp
  GSRendererSoft.h
  GSSettingsDlg.cpp
  GSSettingsDlg.h
  GSSoftVertex.cpp
  GSSoftVertex.h
  GSState.cpp
  GSState.h
  GSTables.cpp
  GSTables.h
  GSTextureCache.cpp
  GSTextureCache.h
  GSTransfer.cpp
  GSUtil.cpp
  GSUtil.h
  GSVertexList.cpp
  GSVertexList.h
  GSWnd.cpp
  GSWnd.h
  GSdx9.cpp
  GSdx9.def
  GSdx9.h
  GSdx9.icproj
  GSdx9.rc
  GSdx9.sln
  GSdx9.vcproj
  GSdx9_ic.sln
  GSdx9_vs2005.sln
  GSdx9_vs2005.vcproj
  resource.h
  stdafx.cpp
  stdafx.h
  x86-32.asm
  x86-64.asm
  x86.cpp
  x86.h