openMSX
gl_mat.hh
Go to the documentation of this file.
1 #ifndef GL_MAT_HH
2 #define GL_MAT_HH
3 
4 // This code implements a mathematical matrix, comparable in functionality
5 // and syntax to the matrix types in GLSL.
6 //
7 // The code was tuned for 4x4 matrixes, but when it didn't complicate the
8 // code, other sizes (even non-square) are supported as well.
9 //
10 // The functionality of this code is focused on openGL(ES). So only the
11 // operation 'matrix x column-vector' is supported and not e.g. 'row-vector x
12 // matrix'. The internal layout is also compatible with openGL(ES).
13 //
14 // This code itself doesn't have many SSE optimizations, but because it is built
15 // on top of an optimized vector type the generated code for 4x4 matrix
16 // multiplication is as efficient as a hand-written SSE assembly routine
17 // (verified with gcc-4.8 on x86_64). The efficiency for other matrix sizes was
18 // not a goal for this code (smaller sizes might be ok, bigger sizes most
19 // likely not).
20 
21 #include "gl_vec.hh"
22 #include <cassert>
23 
24 namespace gl {
25 
26 // Matrix with M rows and N columns (M by N), elements have type T.
27 // Internally elements are stored in column-major order. This is the same
28 // convention as openGL (and Fortran), but different from the typical row-major
29 // order in C++. This allows to directly pass these matrices to openGL(ES).
30 template<int M, int N, typename T> class matMxN
31 {
32 public:
33  // Default copy-constructor and assignment operator.
34 
35  // Construct identity matrix.
37  {
38  for (int i = 0; i < N; ++i) {
39  for (int j = 0; j < M; ++j) {
40  c[i][j] = (i == j) ? T(1) : T(0);
41  }
42  }
43  }
44 
45  // Construct diagonal matrix.
46  explicit matMxN(const vecN<(M < N ? M : N), T>& d)
47  {
48  for (int i = 0; i < N; ++i) {
49  for (int j = 0; j < M; ++j) {
50  c[i][j] = (i == j) ? d[i] : T(0);
51  }
52  }
53  }
54 
55  // Construct from larger matrix (higher order elements are dropped).
56  template<int M2, int N2> explicit matMxN(const matMxN<M2, N2, T>& x)
57  {
58  static_assert(((M2 > M) && (N2 >= N)) || ((M2 >= M) && (N2 > N)),
59  "matrix must have strictly larger dimensions");
60  for (int i = 0; i < N; ++i) c[i] = vecN<M, T>(x[i]);
61  }
62 
63  // Construct matrix from 2 given columns (only valid when N == 2).
64  matMxN(const vecN<M, T>& x, const vecN<M, T>& y)
65  {
66  static_assert(N == 2, "wrong #constructor arguments");
67  c[0] = x; c[1] = y;
68  }
69 
70  // Construct matrix from 3 given columns (only valid when N == 3).
71  matMxN(const vecN<M, T>& x, const vecN<M, T>& y, const vecN<M, T>& z)
72  {
73  static_assert(N == 3, "wrong #constructor arguments");
74  c[0] = x; c[1] = y; c[2] = z;
75  }
76 
77  // Construct matrix from 4 given columns (only valid when N == 4).
78  matMxN(const vecN<M, T>& x, const vecN<M, T>& y,
79  const vecN<M, T>& z, const vecN<M, T>& w)
80  {
81  static_assert(N == 4, "wrong #constructor arguments");
82  c[0] = x; c[1] = y; c[2] = z; c[3] = w;
83  }
84 
85  // Access the i-the column of the matrix.
86  // Vectors are also indexable, so 'A[i][j]' returns the element
87  // at the i-th column, j-th row.
88  const vecN<M, T>& operator[](unsigned i) const {
89  #ifdef DEBUG
90  assert(i < N);
91  #endif
92  return c[i];
93  }
94  vecN<M, T>& operator[](unsigned i) {
95  #ifdef DEBUG
96  assert(i < N);
97  #endif
98  return c[i];
99  }
100 
101  // Assignment version of the +,-,* operations defined below.
102  matMxN& operator+=(const matMxN& x) { *this = *this + x; return *this; }
103  matMxN& operator-=(const matMxN& x) { *this = *this - x; return *this; }
104  matMxN& operator*=(T x) { *this = *this * x; return *this; }
105  matMxN& operator*=(const matMxN<N, N, T>& x) { *this = *this * x; return *this; }
106 
107 private:
108  vecN<M, T> c[N];
109 };
110 
111 
112 // Specialization for 4x4 matrix.
113 // This specialization is not needed for correctness, but it does help gcc-4.8
114 // to generate better code for the constructors (clang-3.4 doesn't need help).
115 // Maybe remove this specialization in the future?
116 template<typename T> class matMxN<4, 4, T>
117 {
118 public:
120  {
121  T one(1); T zero(0);
122  c[0] = vecN<4,T>( one, zero, zero, zero);
123  c[1] = vecN<4,T>(zero, one, zero, zero);
124  c[2] = vecN<4,T>(zero, zero, one, zero);
125  c[3] = vecN<4,T>(zero, zero, zero, one);
126  }
127 
128  explicit matMxN(vecN<4, T> d)
129  {
130  T zero(0);
131  c[0] = vecN<4,T>(d[0], zero, zero, zero);
132  c[1] = vecN<4,T>(zero, d[1], zero, zero);
133  c[2] = vecN<4,T>(zero, zero, d[2], zero);
134  c[3] = vecN<4,T>(zero, zero, zero, d[3]);
135  }
136 
138  {
139  c[0] = x; c[1] = y; c[2] = z; c[3] = w;
140  }
141 
142  vecN<4, T> operator[](int i) const { return c[i]; }
143  vecN<4, T>& operator[](int i) { return c[i]; }
144 
145  // Assignment version of the +,-,* operations defined below.
146  matMxN& operator+=(const matMxN& x) { *this = *this + x; return *this; }
147  matMxN& operator-=(const matMxN& x) { *this = *this - x; return *this; }
148  matMxN& operator*=(T x) { *this = *this * x; return *this; }
149  matMxN& operator*=(const matMxN& x) { *this = *this * x; return *this; }
150 
151 private:
152  vecN<4, T> c[4];
153 };
154 
155 
156 // Convenience typedefs (same names as used by GLSL).
160 
161 
162 // -- Matrix functions --
163 
164 // matrix equality / inequality
165 template<int M, int N, typename T>
166 inline bool operator==(const matMxN<M, N, T>& A, const matMxN<M, N, T>& B)
167 {
168  for (int i = 0; i < N; ++i) if (A[i] != B[i]) return false;
169  return true;
170 }
171 template<int M, int N, typename T>
172 inline bool operator!=(const matMxN<M, N, T>& A, const matMxN<M, N, T>& B)
173 {
174  return !(A == B);
175 }
176 
177 // matrix + matrix
178 template<int M, int N, typename T>
180 {
181  matMxN<M, N, T> R;
182  for (int i = 0; i < N; ++i) R[i] = A[i] + B[i];
183  return R;
184 }
185 
186 // matrix - matrix
187 template<int M, int N, typename T>
189 {
190  matMxN<M, N, T> R;
191  for (int i = 0; i < N; ++i) R[i] = A[i] - B[i];
192  return R;
193 }
194 
195 // matrix negation
196 template<int M, int N, typename T>
198 {
200 }
201 
202 // scalar * matrix
203 template<int M, int N, typename T>
205 {
206  matMxN<M, N, T> R;
207  for (int i = 0; i < N; ++i) R[i] = x * A[i];
208  return R;
209 }
210 
211 // matrix * scalar
212 template<int M, int N, typename T>
214 {
215  matMxN<M, N, T> R;
216  for (int i = 0; i < N; ++i) R[i] = A[i] * x;
217  return R;
218 }
219 
220 // matrix * column-vector
221 template<int M, int N, typename T>
223 {
224  vecN<M, T> r;
225  for (int i = 0; i < N; ++i) r += A[i] * x[i];
226  return r;
227 }
228 template<typename T>
230 {
231  // Small optimization for 4x4 matrix: reassociate add-chain
232  return (A[0] * x[0] + A[1] * x[1]) + (A[2] * x[2] + A[3] * x[3]);
233 }
234 
235 // matrix * matrix
236 template<int M, int N, int O, typename T>
238 {
239  matMxN<M, O, T> R;
240  for (int i = 0; i < O; ++i) R[i] = A * B[i];
241  return R;
242 }
243 
244 // matrix transpose
245 template<int M, int N, typename T>
247 {
248  matMxN<N, M, T> R;
249  for (int i = 0; i < N; ++i) {
250  for (int j = 0; j < M; ++j) {
251  R[j][i] = A[i][j];
252  }
253  }
254  return R;
255 }
256 
257 // determinant of a 2x2 matrix
258 template<typename T>
259 inline T determinant(const matMxN<2, 2, T>& A)
260 {
261  return A[0][0] * A[1][1] - A[0][1] * A[1][0];
262 }
263 
264 // determinant of a 3x3 matrix
265 template<typename T>
266 inline T determinant(const matMxN<3, 3, T>& A)
267 {
268  return A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
269  - A[1][0] * (A[0][1] * A[2][2] - A[0][2] * A[2][1])
270  + A[2][0] * (A[0][1] * A[1][2] - A[0][2] * A[1][1]);
271 }
272 
273 // determinant of a 4x4 matrix
274 template<typename T>
275 inline T determinant(const matMxN<4, 4, T>& A)
276 {
277  // Implementation based on glm: http://glm.g-truc.net
278  T f0 = A[2][2] * A[3][3] - A[3][2] * A[2][3];
279  T f1 = A[2][1] * A[3][3] - A[3][1] * A[2][3];
280  T f2 = A[2][1] * A[3][2] - A[3][1] * A[2][2];
281  T f3 = A[2][0] * A[3][3] - A[3][0] * A[2][3];
282  T f4 = A[2][0] * A[3][2] - A[3][0] * A[2][2];
283  T f5 = A[2][0] * A[3][1] - A[3][0] * A[2][1];
284  vecN<4, T> c((A[1][1] * f0 - A[1][2] * f1 + A[1][3] * f2),
285  (A[1][2] * f3 - A[1][3] * f4 - A[1][0] * f0),
286  (A[1][0] * f1 - A[1][1] * f3 + A[1][3] * f5),
287  (A[1][1] * f4 - A[1][2] * f5 - A[1][0] * f2));
288  return dot(A[0], c);
289 }
290 
291 // inverse of a 2x2 matrix
292 template<typename T>
294 {
295  T d = T(1) / determinant(A);
296  return d * matMxN<2, 2, T>(vecN<2, T>( A[1][1], -A[0][1]),
297  vecN<2, T>(-A[1][0], A[0][0]));
298 }
299 
300 // inverse of a 3x3 matrix
301 template<typename T>
303 {
304  T d = T(1) / determinant(A);
305  return d * matMxN<3, 3, T>(
306  vecN<3, T>(A[1][1] * A[2][2] - A[1][2] * A[2][1],
307  A[0][2] * A[2][1] - A[0][1] * A[2][2],
308  A[0][1] * A[1][2] - A[0][2] * A[1][1]),
309  vecN<3, T>(A[1][2] * A[2][0] - A[1][0] * A[2][2],
310  A[0][0] * A[2][2] - A[0][2] * A[2][0],
311  A[0][2] * A[1][0] - A[0][0] * A[1][2]),
312  vecN<3, T>(A[1][0] * A[2][1] - A[1][1] * A[2][0],
313  A[0][1] * A[2][0] - A[0][0] * A[2][1],
314  A[0][0] * A[1][1] - A[0][1] * A[1][0]));
315 }
316 
317 // inverse of a 4x4 matrix
318 template<typename T>
320 {
321  // Implementation based on glm: http://glm.g-truc.net
322 
323  T c00 = A[2][2] * A[3][3] - A[3][2] * A[2][3];
324  T c02 = A[1][2] * A[3][3] - A[3][2] * A[1][3];
325  T c03 = A[1][2] * A[2][3] - A[2][2] * A[1][3];
326 
327  T c04 = A[2][1] * A[3][3] - A[3][1] * A[2][3];
328  T c06 = A[1][1] * A[3][3] - A[3][1] * A[1][3];
329  T c07 = A[1][1] * A[2][3] - A[2][1] * A[1][3];
330 
331  T c08 = A[2][1] * A[3][2] - A[3][1] * A[2][2];
332  T c10 = A[1][1] * A[3][2] - A[3][1] * A[1][2];
333  T c11 = A[1][1] * A[2][2] - A[2][1] * A[1][2];
334 
335  T c12 = A[2][0] * A[3][3] - A[3][0] * A[2][3];
336  T c14 = A[1][0] * A[3][3] - A[3][0] * A[1][3];
337  T c15 = A[1][0] * A[2][3] - A[2][0] * A[1][3];
338 
339  T c16 = A[2][0] * A[3][2] - A[3][0] * A[2][2];
340  T c18 = A[1][0] * A[3][2] - A[3][0] * A[1][2];
341  T c19 = A[1][0] * A[2][2] - A[2][0] * A[1][2];
342 
343  T c20 = A[2][0] * A[3][1] - A[3][0] * A[2][1];
344  T c22 = A[1][0] * A[3][1] - A[3][0] * A[1][1];
345  T c23 = A[1][0] * A[2][1] - A[2][0] * A[1][1];
346 
347  vecN<4, T> f0(c00, c00, c02, c03);
348  vecN<4, T> f1(c04, c04, c06, c07);
349  vecN<4, T> f2(c08, c08, c10, c11);
350  vecN<4, T> f3(c12, c12, c14, c15);
351  vecN<4, T> f4(c16, c16, c18, c19);
352  vecN<4, T> f5(c20, c20, c22, c23);
353 
354  vecN<4, T> v0(A[1][0], A[0][0], A[0][0], A[0][0]);
355  vecN<4, T> v1(A[1][1], A[0][1], A[0][1], A[0][1]);
356  vecN<4, T> v2(A[1][2], A[0][2], A[0][2], A[0][2]);
357  vecN<4, T> v3(A[1][3], A[0][3], A[0][3], A[0][3]);
358 
359  vecN<4, T> i0(v1 * f0 - v2 * f1 + v3 * f2);
360  vecN<4, T> i1(v0 * f0 - v2 * f3 + v3 * f4);
361  vecN<4, T> i2(v0 * f1 - v1 * f3 + v3 * f5);
362  vecN<4, T> i3(v0 * f2 - v1 * f4 + v2 * f5);
363 
364  vecN<4, T> sa(+1, -1, +1, -1);
365  vecN<4, T> sb(-1, +1, -1, +1);
366  matMxN<4, 4, T> inverse(i0 * sa, i1 * sb, i2 * sa, i3 * sb);
367 
368  vecN<4, T> row0(inverse[0][0], inverse[1][0], inverse[2][0], inverse[3][0]);
369  T OneOverDeterminant = static_cast<T>(1) / dot(A[0], row0);
370 
371  return inverse * OneOverDeterminant;
372 }
373 
374 // norm-2 squared
375 template<int M, int N, typename T>
376 inline T norm2_2(const matMxN<M, N, T>& A)
377 {
378  vecN<M, T> t;
379  for (int i = 0; i < N; ++i) t += A[i] * A[i];
380  return sum(t);
381 }
382 
383 // Textual representation. (Only) used to debug unittest.
384 template<int M, int N, typename T>
385 std::ostream& operator<<(std::ostream& os, const matMxN<M, N, T>& A)
386 {
387  for (int j = 0; j < M; ++j) {
388  for (int i = 0; i < N; ++i) {
389  os << A[i][j] << ' ';
390  }
391  os << '\n';
392  }
393  return os;
394 }
395 
396 } // namespace gl
397 
398 
399 // --- SSE optimizations ---
400 
401 #ifdef __SSE__
402 #include <xmmintrin.h>
403 
404 namespace gl {
405 
406 // transpose of 4x4-float matrix
407 inline mat4 transpose(const mat4& A)
408 {
409  mat4 R;
410  __m128 t0 = _mm_shuffle_ps(A[0].sse(), A[1].sse(), _MM_SHUFFLE(1,0,1,0));
411  __m128 t1 = _mm_shuffle_ps(A[0].sse(), A[1].sse(), _MM_SHUFFLE(3,2,3,2));
412  __m128 t2 = _mm_shuffle_ps(A[2].sse(), A[3].sse(), _MM_SHUFFLE(1,0,1,0));
413  __m128 t3 = _mm_shuffle_ps(A[2].sse(), A[3].sse(), _MM_SHUFFLE(3,2,3,2));
414  R[0] = vec4(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,0,2,0)));
415  R[1] = vec4(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,1,3,1)));
416  R[2] = vec4(_mm_shuffle_ps(t1, t3, _MM_SHUFFLE(2,0,2,0)));
417  R[3] = vec4(_mm_shuffle_ps(t1, t3, _MM_SHUFFLE(3,1,3,1)));
418  return R;
419 }
420 
421 // inverse of a 4x4-float matrix
422 inline mat4 inverse(const mat4& A)
423 {
424  // Implementation based on glm: http://glm.g-truc.net
425 
426  __m128 sa0 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(3,3,3,3));
427  __m128 sb0 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(2,2,2,2));
428  __m128 s00 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(2,2,2,2));
429  __m128 s10 = _mm_shuffle_ps(sa0, sa0, _MM_SHUFFLE(2,0,0,0));
430  __m128 s20 = _mm_shuffle_ps(sb0, sb0, _MM_SHUFFLE(2,0,0,0));
431  __m128 s30 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(3,3,3,3));
432  __m128 f0 = _mm_sub_ps(_mm_mul_ps(s00, s10), _mm_mul_ps(s20, s30));
433 
434  __m128 sa1 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(3,3,3,3));
435  __m128 sb1 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(1,1,1,1));
436  __m128 s01 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(1,1,1,1));
437  __m128 s11 = _mm_shuffle_ps(sa1, sa1, _MM_SHUFFLE(2,0,0,0));
438  __m128 s21 = _mm_shuffle_ps(sb1, sb1, _MM_SHUFFLE(2,0,0,0));
439  __m128 s31 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(3,3,3,3));
440  __m128 f1 = _mm_sub_ps(_mm_mul_ps(s01, s11), _mm_mul_ps(s21, s31));
441 
442  __m128 sa2 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(2,2,2,2));
443  __m128 sb2 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(1,1,1,1));
444  __m128 s02 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(1,1,1,1));
445  __m128 s12 = _mm_shuffle_ps(sa2, sa2, _MM_SHUFFLE(2,0,0,0));
446  __m128 s22 = _mm_shuffle_ps(sb2, sb2, _MM_SHUFFLE(2,0,0,0));
447  __m128 s32 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(2,2,2,2));
448  __m128 f2 = _mm_sub_ps(_mm_mul_ps(s02, s12), _mm_mul_ps(s22, s32));
449 
450  __m128 sa3 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(3,3,3,3));
451  __m128 sb3 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(0,0,0,0));
452  __m128 s03 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(0,0,0,0));
453  __m128 s13 = _mm_shuffle_ps(sa3, sa3, _MM_SHUFFLE(2,0,0,0));
454  __m128 s23 = _mm_shuffle_ps(sb3, sb3, _MM_SHUFFLE(2,0,0,0));
455  __m128 s33 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(3,3,3,3));
456  __m128 f3 = _mm_sub_ps(_mm_mul_ps(s03, s13), _mm_mul_ps(s23, s33));
457 
458  __m128 sa4 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(2,2,2,2));
459  __m128 sb4 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(0,0,0,0));
460  __m128 s04 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(0,0,0,0));
461  __m128 s14 = _mm_shuffle_ps(sa4, sa4, _MM_SHUFFLE(2,0,0,0));
462  __m128 s24 = _mm_shuffle_ps(sb4, sb4, _MM_SHUFFLE(2,0,0,0));
463  __m128 s34 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(2,2,2,2));
464  __m128 f4 = _mm_sub_ps(_mm_mul_ps(s04, s14), _mm_mul_ps(s24, s34));
465 
466  __m128 sa5 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(1,1,1,1));
467  __m128 sb5 = _mm_shuffle_ps(A[3].sse(), A[2].sse(), _MM_SHUFFLE(0,0,0,0));
468  __m128 s05 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(0,0,0,0));
469  __m128 s15 = _mm_shuffle_ps(sa5, sa5, _MM_SHUFFLE(2,0,0,0));
470  __m128 s25 = _mm_shuffle_ps(sb5, sb5, _MM_SHUFFLE(2,0,0,0));
471  __m128 s35 = _mm_shuffle_ps(A[2].sse(), A[1].sse(), _MM_SHUFFLE(1,1,1,1));
472  __m128 f5 = _mm_sub_ps(_mm_mul_ps(s05, s15), _mm_mul_ps(s25, s35));
473 
474  __m128 t0 = _mm_shuffle_ps(A[1].sse(), A[0].sse(), _MM_SHUFFLE(0,0,0,0));
475  __m128 t1 = _mm_shuffle_ps(A[1].sse(), A[0].sse(), _MM_SHUFFLE(1,1,1,1));
476  __m128 t2 = _mm_shuffle_ps(A[1].sse(), A[0].sse(), _MM_SHUFFLE(2,2,2,2));
477  __m128 t3 = _mm_shuffle_ps(A[1].sse(), A[0].sse(), _MM_SHUFFLE(3,3,3,3));
478  __m128 v0 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2,2,2,0));
479  __m128 v1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,2,2,0));
480  __m128 v2 = _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,2,2,0));
481  __m128 v3 = _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,2,0));
482 
483  __m128 s0 = _mm_sub_ps(_mm_mul_ps(v1, f0), _mm_mul_ps(v2, f1));
484  __m128 s1 = _mm_sub_ps(_mm_mul_ps(v0, f0), _mm_mul_ps(v2, f3));
485  __m128 s2 = _mm_sub_ps(_mm_mul_ps(v0, f1), _mm_mul_ps(v1, f3));
486  __m128 s3 = _mm_sub_ps(_mm_mul_ps(v0, f2), _mm_mul_ps(v1, f4));
487  __m128 sa = _mm_set_ps( 1.0f,-1.0f, 1.0f,-1.0f);
488  __m128 sb = _mm_set_ps(-1.0f, 1.0f,-1.0f, 1.0f);
489  __m128 i0 = _mm_mul_ps(sb, _mm_add_ps(s0, _mm_mul_ps(v3, f2)));
490  __m128 i1 = _mm_mul_ps(sa, _mm_add_ps(s1, _mm_mul_ps(v3, f4)));
491  __m128 i2 = _mm_mul_ps(sb, _mm_add_ps(s2, _mm_mul_ps(v3, f5)));
492  __m128 i3 = _mm_mul_ps(sa, _mm_add_ps(s3, _mm_mul_ps(v2, f5)));
493 
494  __m128 ra = _mm_shuffle_ps(i0, i1, _MM_SHUFFLE(0,0,0,0));
495  __m128 rb = _mm_shuffle_ps(i2, i3, _MM_SHUFFLE(0,0,0,0));
496  vec4 row0 = vec4(_mm_shuffle_ps(ra, rb, _MM_SHUFFLE(2,0,2,0)));
497 
498  vec4 rd = recip(dot_broadcast(A[0], row0));
499  return mat4(vec4(i0) * rd, vec4(i1) * rd, vec4(i2) * rd, vec4(i3) * rd);
500 }
501 
502 } // namespace gl
503 
504 #endif // __SSE__
505 
506 #endif
matMxN(const matMxN< M2, N2, T > &x)
Definition: gl_mat.hh:56
bool operator==(const matMxN< M, N, T > &A, const matMxN< M, N, T > &B)
Definition: gl_mat.hh:166
matMxN & operator*=(T x)
Definition: gl_mat.hh:104
vecN< 4, T > & operator[](int i)
Definition: gl_mat.hh:143
matMxN()
Definition: gl_mat.hh:36
matMxN< 4, 4, float > mat4
Definition: gl_mat.hh:159
T norm2_2(const matMxN< M, N, T > &A)
Definition: gl_mat.hh:376
T sum(const vecN< N, T > &x)
Definition: gl_vec.hh:310
matMxN & operator+=(const matMxN &x)
Definition: gl_mat.hh:102
matMxN(const vecN< M, T > &x, const vecN< M, T > &y, const vecN< M, T > &z)
Definition: gl_mat.hh:71
matMxN< M, N, T > operator+(const matMxN< M, N, T > &A, const matMxN< M, N, T > &B)
Definition: gl_mat.hh:179
const vecN< M, T > & operator[](unsigned i) const
Definition: gl_mat.hh:88
matMxN & operator+=(const matMxN &x)
Definition: gl_mat.hh:146
vecN< N, T > recip(const vecN< N, T > &x)
Definition: gl_vec.hh:239
#define O(a)
Definition: YM2151.cc:137
matMxN & operator*=(T x)
Definition: gl_mat.hh:148
matMxN< M, N, T > operator-(const matMxN< M, N, T > &A, const matMxN< M, N, T > &B)
Definition: gl_mat.hh:188
vecN< 4, float > vec4
Definition: gl_vec.hh:141
T determinant(const matMxN< 2, 2, T > &A)
Definition: gl_mat.hh:259
matMxN(vecN< 4, T > x, vecN< 4, T > y, vecN< 4, T > z, vecN< 4, T > w)
Definition: gl_mat.hh:137
vecN< 4, T > operator[](int i) const
Definition: gl_mat.hh:142
matMxN(const vecN< M, T > &x, const vecN< M, T > &y, const vecN< M, T > &z, const vecN< M, T > &w)
Definition: gl_mat.hh:78
matMxN(const vecN< M, T > &x, const vecN< M, T > &y)
Definition: gl_mat.hh:64
matMxN(const vecN<(M< N ? M :N), T > &d)
Definition: gl_mat.hh:46
vecN< M, T > & operator[](unsigned i)
Definition: gl_mat.hh:94
matMxN & operator-=(const matMxN &x)
Definition: gl_mat.hh:103
T dot(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:324
imat3 i3(ivec3(1, 2, 3), ivec3(4, 5, 6), ivec3(7, 8, 9))
matMxN & operator*=(const matMxN< N, N, T > &x)
Definition: gl_mat.hh:105
matMxN< 2, 2, T > inverse(const matMxN< 2, 2, T > &A)
Definition: gl_mat.hh:293
matMxN & operator*=(const matMxN &x)
Definition: gl_mat.hh:149
matMxN< M, N, T > operator*(T x, const matMxN< M, N, T > &A)
Definition: gl_mat.hh:204
vecN< N, T > dot_broadcast(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:329
matMxN< N, M, T > transpose(const matMxN< M, N, T > &A)
Definition: gl_mat.hh:246
matMxN(vecN< 4, T > d)
Definition: gl_mat.hh:128
bool operator!=(const matMxN< M, N, T > &A, const matMxN< M, N, T > &B)
Definition: gl_mat.hh:172
TclObject t
Definition: gl_mat.hh:24
matMxN & operator-=(const matMxN &x)
Definition: gl_mat.hh:147