diff options
Diffstat (limited to '')
-rw-r--r-- | linden/indra/llmath/llv4matrix4.h | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/linden/indra/llmath/llv4matrix4.h b/linden/indra/llmath/llv4matrix4.h new file mode 100644 index 0000000..38280a2 --- /dev/null +++ b/linden/indra/llmath/llv4matrix4.h | |||
@@ -0,0 +1,251 @@ | |||
1 | /** | ||
2 | * @file llviewerjointmesh.cpp | ||
3 | * @brief LLV4* class header file - vector processor enabled math | ||
4 | * | ||
5 | * Copyright (c) 2007-2007, Linden Research, Inc. | ||
6 | * | ||
7 | * Second Life Viewer Source Code | ||
8 | * The source code in this file ("Source Code") is provided by Linden Lab | ||
9 | * to you under the terms of the GNU General Public License, version 2.0 | ||
10 | * ("GPL"), unless you have obtained a separate licensing agreement | ||
11 | * ("Other License"), formally executed by you and Linden Lab. Terms of | ||
12 | * the GPL can be found in doc/GPL-license.txt in this distribution, or | ||
13 | * online at http://secondlife.com/developers/opensource/gplv2 | ||
14 | * | ||
15 | * There are special exceptions to the terms and conditions of the GPL as | ||
16 | * it is applied to this Source Code. View the full text of the exception | ||
17 | * in the file doc/FLOSS-exception.txt in this software distribution, or | ||
18 | * online at http://secondlife.com/developers/opensource/flossexception | ||
19 | * | ||
20 | * By copying, modifying or distributing this software, you acknowledge | ||
21 | * that you have read and understood your obligations described above, | ||
22 | * and agree to abide by those obligations. | ||
23 | * | ||
24 | * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO | ||
25 | * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, | ||
26 | * COMPLETENESS OR PERFORMANCE. | ||
27 | */ | ||
28 | |||
29 | #ifndef LL_LLV4MATRIX4_H | ||
30 | #define LL_LLV4MATRIX4_H | ||
31 | |||
32 | #include "llv4math.h" | ||
33 | #include "llv4matrix3.h" // just for operator LLV4Matrix3() | ||
34 | #include "llv4vector3.h" | ||
35 | |||
36 | //----------------------------------------------------------------------------- | ||
37 | //----------------------------------------------------------------------------- | ||
38 | // LLV4Matrix4 | ||
39 | //----------------------------------------------------------------------------- | ||
40 | //----------------------------------------------------------------------------- | ||
41 | |||
42 | LL_LLV4MATH_ALIGN_PREFIX | ||
43 | |||
44 | class LLV4Matrix4 | ||
45 | { | ||
46 | public: | ||
47 | union { | ||
48 | F32 mMatrix[LLV4_NUM_AXIS][LLV4_NUM_AXIS]; | ||
49 | V4F32 mV[LLV4_NUM_AXIS]; | ||
50 | }; | ||
51 | |||
52 | void lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w); | ||
53 | void multiply(const LLVector3 &a, LLVector3& o) const; | ||
54 | void multiply(const LLVector3 &a, LLV4Vector3& o) const; | ||
55 | |||
56 | const LLV4Matrix4& transpose(); | ||
57 | const LLV4Matrix4& translate(const LLVector3 &vec); | ||
58 | const LLV4Matrix4& translate(const LLV4Vector3 &vec); | ||
59 | const LLV4Matrix4& operator=(const LLMatrix4& a); | ||
60 | |||
61 | operator LLMatrix4() const { return *(reinterpret_cast<const LLMatrix4*>(const_cast<const F32*>(&mMatrix[0][0]))); } | ||
62 | operator LLV4Matrix3() const { return *(reinterpret_cast<const LLV4Matrix3*>(const_cast<const F32*>(&mMatrix[0][0]))); } | ||
63 | |||
64 | friend LLVector3 operator*(const LLVector3 &a, const LLV4Matrix4 &b); | ||
65 | } | ||
66 | |||
67 | LL_LLV4MATH_ALIGN_POSTFIX; | ||
68 | |||
69 | //----------------------------------------------------------------------------- | ||
70 | //----------------------------------------------------------------------------- | ||
71 | // LLV4Matrix4 - SSE | ||
72 | //----------------------------------------------------------------------------- | ||
73 | //----------------------------------------------------------------------------- | ||
74 | |||
75 | #if LL_VECTORIZE | ||
76 | |||
77 | inline void LLV4Matrix4::lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w) | ||
78 | { | ||
79 | __m128 vw = _mm_set1_ps(w); | ||
80 | mV[VX] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VX], a.mV[VX]), vw), a.mV[VX]); // ( b - a ) * w + a | ||
81 | mV[VY] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VY], a.mV[VY]), vw), a.mV[VY]); | ||
82 | mV[VZ] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VZ], a.mV[VZ]), vw), a.mV[VZ]); | ||
83 | mV[VW] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VW], a.mV[VW]), vw), a.mV[VW]); | ||
84 | } | ||
85 | |||
86 | inline void LLV4Matrix4::multiply(const LLVector3 &a, LLVector3& o) const | ||
87 | { | ||
88 | LLV4Vector3 j; | ||
89 | j.v = _mm_add_ps(mV[VW], _mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX])); // ( ax * vx ) + vw | ||
90 | j.v = _mm_add_ps(j.v , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY])); | ||
91 | j.v = _mm_add_ps(j.v , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ])); | ||
92 | o.setVec(j.mV); | ||
93 | } | ||
94 | |||
95 | inline void LLV4Matrix4::multiply(const LLVector3 &a, LLV4Vector3& o) const | ||
96 | { | ||
97 | o.v = _mm_add_ps(mV[VW], _mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX])); // ( ax * vx ) + vw | ||
98 | o.v = _mm_add_ps(o.v , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY])); | ||
99 | o.v = _mm_add_ps(o.v , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ])); | ||
100 | } | ||
101 | |||
102 | inline const LLV4Matrix4& LLV4Matrix4::translate(const LLV4Vector3 &vec) | ||
103 | { | ||
104 | mV[VW] = _mm_add_ps(mV[VW], vec.v); | ||
105 | return (*this); | ||
106 | } | ||
107 | |||
108 | //----------------------------------------------------------------------------- | ||
109 | //----------------------------------------------------------------------------- | ||
110 | // LLV4Matrix4 | ||
111 | //----------------------------------------------------------------------------- | ||
112 | //----------------------------------------------------------------------------- | ||
113 | |||
114 | #else | ||
115 | |||
116 | inline void LLV4Matrix4::lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w) | ||
117 | { | ||
118 | mMatrix[VX][VX] = llv4lerp(a.mMatrix[VX][VX], b.mMatrix[VX][VX], w); | ||
119 | mMatrix[VX][VY] = llv4lerp(a.mMatrix[VX][VY], b.mMatrix[VX][VY], w); | ||
120 | mMatrix[VX][VZ] = llv4lerp(a.mMatrix[VX][VZ], b.mMatrix[VX][VZ], w); | ||
121 | |||
122 | mMatrix[VY][VX] = llv4lerp(a.mMatrix[VY][VX], b.mMatrix[VY][VX], w); | ||
123 | mMatrix[VY][VY] = llv4lerp(a.mMatrix[VY][VY], b.mMatrix[VY][VY], w); | ||
124 | mMatrix[VY][VZ] = llv4lerp(a.mMatrix[VY][VZ], b.mMatrix[VY][VZ], w); | ||
125 | |||
126 | mMatrix[VZ][VX] = llv4lerp(a.mMatrix[VZ][VX], b.mMatrix[VZ][VX], w); | ||
127 | mMatrix[VZ][VY] = llv4lerp(a.mMatrix[VZ][VY], b.mMatrix[VZ][VY], w); | ||
128 | mMatrix[VZ][VZ] = llv4lerp(a.mMatrix[VZ][VZ], b.mMatrix[VZ][VZ], w); | ||
129 | |||
130 | mMatrix[VW][VX] = llv4lerp(a.mMatrix[VW][VX], b.mMatrix[VW][VX], w); | ||
131 | mMatrix[VW][VY] = llv4lerp(a.mMatrix[VW][VY], b.mMatrix[VW][VY], w); | ||
132 | mMatrix[VW][VZ] = llv4lerp(a.mMatrix[VW][VZ], b.mMatrix[VW][VZ], w); | ||
133 | } | ||
134 | |||
135 | inline void LLV4Matrix4::multiply(const LLVector3 &a, LLVector3& o) const | ||
136 | { | ||
137 | o.setVec( a.mV[VX] * mMatrix[VX][VX] + | ||
138 | a.mV[VY] * mMatrix[VY][VX] + | ||
139 | a.mV[VZ] * mMatrix[VZ][VX] + | ||
140 | mMatrix[VW][VX], | ||
141 | |||
142 | a.mV[VX] * mMatrix[VX][VY] + | ||
143 | a.mV[VY] * mMatrix[VY][VY] + | ||
144 | a.mV[VZ] * mMatrix[VZ][VY] + | ||
145 | mMatrix[VW][VY], | ||
146 | |||
147 | a.mV[VX] * mMatrix[VX][VZ] + | ||
148 | a.mV[VY] * mMatrix[VY][VZ] + | ||
149 | a.mV[VZ] * mMatrix[VZ][VZ] + | ||
150 | mMatrix[VW][VZ]); | ||
151 | } | ||
152 | |||
153 | inline void LLV4Matrix4::multiply(const LLVector3 &a, LLV4Vector3& o) const | ||
154 | { | ||
155 | o.setVec( a.mV[VX] * mMatrix[VX][VX] + | ||
156 | a.mV[VY] * mMatrix[VY][VX] + | ||
157 | a.mV[VZ] * mMatrix[VZ][VX] + | ||
158 | mMatrix[VW][VX], | ||
159 | |||
160 | a.mV[VX] * mMatrix[VX][VY] + | ||
161 | a.mV[VY] * mMatrix[VY][VY] + | ||
162 | a.mV[VZ] * mMatrix[VZ][VY] + | ||
163 | mMatrix[VW][VY], | ||
164 | |||
165 | a.mV[VX] * mMatrix[VX][VZ] + | ||
166 | a.mV[VY] * mMatrix[VY][VZ] + | ||
167 | a.mV[VZ] * mMatrix[VZ][VZ] + | ||
168 | mMatrix[VW][VZ]); | ||
169 | } | ||
170 | |||
171 | inline const LLV4Matrix4& LLV4Matrix4::translate(const LLV4Vector3 &vec) | ||
172 | { | ||
173 | mMatrix[3][0] += vec.mV[0]; | ||
174 | mMatrix[3][1] += vec.mV[1]; | ||
175 | mMatrix[3][2] += vec.mV[2]; | ||
176 | return (*this); | ||
177 | } | ||
178 | |||
179 | //----------------------------------------------------------------------------- | ||
180 | //----------------------------------------------------------------------------- | ||
181 | // LLV4Matrix4 | ||
182 | //----------------------------------------------------------------------------- | ||
183 | //----------------------------------------------------------------------------- | ||
184 | |||
185 | #endif | ||
186 | |||
187 | inline const LLV4Matrix4& LLV4Matrix4::operator=(const LLMatrix4& a) | ||
188 | { | ||
189 | memcpy(mMatrix, a.mMatrix, sizeof(F32) * 16 ); | ||
190 | return *this; | ||
191 | } | ||
192 | |||
193 | inline const LLV4Matrix4& LLV4Matrix4::transpose() | ||
194 | { | ||
195 | #if LL_VECTORIZE && defined(_MM_TRANSPOSE4_PS) | ||
196 | _MM_TRANSPOSE4_PS(mV[VX], mV[VY], mV[VZ], mV[VW]); | ||
197 | #else | ||
198 | LLV4Matrix4 mat; | ||
199 | mat.mMatrix[0][0] = mMatrix[0][0]; | ||
200 | mat.mMatrix[1][0] = mMatrix[0][1]; | ||
201 | mat.mMatrix[2][0] = mMatrix[0][2]; | ||
202 | mat.mMatrix[3][0] = mMatrix[0][3]; | ||
203 | |||
204 | mat.mMatrix[0][1] = mMatrix[1][0]; | ||
205 | mat.mMatrix[1][1] = mMatrix[1][1]; | ||
206 | mat.mMatrix[2][1] = mMatrix[1][2]; | ||
207 | mat.mMatrix[3][1] = mMatrix[1][3]; | ||
208 | |||
209 | mat.mMatrix[0][2] = mMatrix[2][0]; | ||
210 | mat.mMatrix[1][2] = mMatrix[2][1]; | ||
211 | mat.mMatrix[2][2] = mMatrix[2][2]; | ||
212 | mat.mMatrix[3][2] = mMatrix[2][3]; | ||
213 | |||
214 | mat.mMatrix[0][3] = mMatrix[3][0]; | ||
215 | mat.mMatrix[1][3] = mMatrix[3][1]; | ||
216 | mat.mMatrix[2][3] = mMatrix[3][2]; | ||
217 | mat.mMatrix[3][3] = mMatrix[3][3]; | ||
218 | |||
219 | *this = mat; | ||
220 | #endif | ||
221 | return *this; | ||
222 | } | ||
223 | |||
224 | inline const LLV4Matrix4& LLV4Matrix4::translate(const LLVector3 &vec) | ||
225 | { | ||
226 | mMatrix[3][0] += vec.mV[0]; | ||
227 | mMatrix[3][1] += vec.mV[1]; | ||
228 | mMatrix[3][2] += vec.mV[2]; | ||
229 | return (*this); | ||
230 | } | ||
231 | |||
232 | inline LLVector3 operator*(const LLVector3 &a, const LLV4Matrix4 &b) | ||
233 | { | ||
234 | return LLVector3(a.mV[VX] * b.mMatrix[VX][VX] + | ||
235 | a.mV[VY] * b.mMatrix[VY][VX] + | ||
236 | a.mV[VZ] * b.mMatrix[VZ][VX] + | ||
237 | b.mMatrix[VW][VX], | ||
238 | |||
239 | a.mV[VX] * b.mMatrix[VX][VY] + | ||
240 | a.mV[VY] * b.mMatrix[VY][VY] + | ||
241 | a.mV[VZ] * b.mMatrix[VZ][VY] + | ||
242 | b.mMatrix[VW][VY], | ||
243 | |||
244 | a.mV[VX] * b.mMatrix[VX][VZ] + | ||
245 | a.mV[VY] * b.mMatrix[VY][VZ] + | ||
246 | a.mV[VZ] * b.mMatrix[VZ][VZ] + | ||
247 | b.mMatrix[VW][VZ]); | ||
248 | } | ||
249 | |||
250 | |||
251 | #endif | ||