openMSX
gl_vec.hh
Go to the documentation of this file.
1 #ifndef GL_VEC_HH
2 #define GL_VEC_HH
3 
4 // This code implements a mathematical vector, comparable in functionality
5 // and syntax to the vector types in GLSL.
6 //
7 // Only vector sizes 2, 3 and 4 are supported. Though when it doesn't
8 // complicate stuff the code was written to support any size.
9 //
10 // Most basic functionality is already there, but this is not meant to be a
11 // full set of GLSL functions. We can always extend the functionality if/when
12 // required.
13 //
14 // Only the 'common' operations on vec4 (4 float values) are 'well' optimized.
15 // I verified that they can be used to built a 4x4 matrix multiplication
16 // routine that is as efficient as a hand-written SSE assembly routine
17 // (verified with gcc-4.8 on x86_64). Actually the other routines are likely
18 // efficient as well, but I mean it wasn't the main goal for this code.
19 //
20 // Calling convention: vec4 internally takes up one 128-bit xmm register. It
21 // can be efficientlt passed by value. The other vector types are passed by
22 // const-reference. Though also in the latter case usually the compiler can
23 // optimize away the indirection.
24 
25 #include "Math.hh"
26 #include <algorithm>
27 #include <cassert>
28 #include <cmath>
29 #include <iostream>
30 
31 namespace gl {
32 
33 // Vector with N components of type T.
34 template<int N, typename T> class vecN
35 {
36 public:
37  // Default copy-constructor and assignment operator.
38 
39  // Construct vector containing all zeros.
40  vecN()
41  {
42  for (int i = 0; i < N; ++i) e[i] = T(0);
43  }
44 
45  // Construct vector containing the same value repeated N times.
46  explicit vecN(T x)
47  {
48  for (int i = 0; i < N; ++i) e[i] = x;
49  }
50 
51  // Conversion constructor from vector of same size but different type
52  template<typename T2>
53  explicit vecN(const vecN<N, T2>& x)
54  {
55  for (int i = 0; i < N; ++i) e[i] = T(x[i]);
56  }
57 
58  // Construct from larger vector (higher order elements are dropped).
59  template<int N2> explicit vecN(const vecN<N2, T>& x)
60  {
61  static_assert(N2 > N, "wrong vector length in constructor");
62  for (int i = 0; i < N; ++i) e[i] = x[i];
63  }
64 
65  // Construct vector from 2 given values (only valid when N == 2).
66  vecN(T x, T y)
67  {
68  static_assert(N == 2, "wrong #constructor arguments");
69  e[0] = x; e[1] = y;
70  }
71 
72  // Construct vector from 3 given values (only valid when N == 3).
73  vecN(T x, T y, T z)
74  {
75  static_assert(N == 3, "wrong #constructor arguments");
76  e[0] = x; e[1] = y; e[2] = z;
77  }
78 
79  // Construct vector from 4 given values (only valid when N == 4).
80  vecN(T x, T y, T z, T w)
81  {
82  static_assert(N == 4, "wrong #constructor arguments");
83  e[0] = x; e[1] = y; e[2] = z; e[3] = w;
84  }
85 
86  // Construct vector from concatenating a scalar and a (smaller) vector.
87  template<int N2>
88  vecN(T x, const vecN<N2, T>& y)
89  {
90  static_assert((1 + N2) == N, "wrong vector length in constructor");
91  e[0] = x;
92  for (int i = 0; i < N2; ++i) e[i + 1] = y[i];
93  }
94 
95  // Construct vector from concatenating a (smaller) vector and a scalar.
96  template<int N1>
97  vecN(const vecN<N1, T>& x, T y)
98  {
99  static_assert((N1 + 1) == N, "wrong vector length in constructor");
100  for (int i = 0; i < N1; ++i) e[i] = x[i];
101  e[N1] = y;
102  }
103 
104  // Construct vector from concatenating two (smaller) vectors.
105  template<int N1, int N2>
106  vecN(const vecN<N1, T>& x, const vecN<N2, T>& y)
107  {
108  static_assert((N1 + N2) == N, "wrong vector length in constructor");
109  for (int i = 0; i < N1; ++i) e[i ] = x[i];
110  for (int i = 0; i < N2; ++i) e[i + N1] = y[i];
111  }
112 
113  // Access the i-th element of this vector.
114  T operator[](unsigned i) const {
115  #ifdef DEBUG
116  assert(i < N);
117  #endif
118  return e[i];
119  }
120  T& operator[](unsigned i) {
121  #ifdef DEBUG
122  assert(i < N);
123  #endif
124  return e[i];
125  }
126 
127  // Assignment version of the +,-,* operations defined below.
128  vecN& operator+=(const vecN& x) { *this = *this + x; return *this; }
129  vecN& operator-=(const vecN& x) { *this = *this - x; return *this; }
130  vecN& operator*=(const vecN& x) { *this = *this * x; return *this; }
131  vecN& operator*=(T x) { *this = *this * x; return *this; }
132 
133 private:
134  T e[N];
135 };
136 
137 
138 // Convenience typedefs (same names as used by GLSL).
145 
146 
147 // -- Scalar functions --
148 
149 // reciprocal square root
150 inline float rsqrt(float x)
151 {
152  return 1.0f / sqrtf(x);
153 }
154 inline double rsqrt(double x)
155 {
156  return 1.0 / sqrt(x);
157 }
158 
159 // convert radians <-> degrees
160 template<typename T> inline T radians(T d)
161 {
162  return d * T(M_PI / 180.0);
163 }
164 template<typename T> inline T degrees(T r)
165 {
166  return r * T(180.0 / M_PI);
167 }
168 
169 
170 // -- Vector functions --
171 
172 // vector equality / inequality
173 template<int N, typename T>
174 inline bool operator==(const vecN<N, T>& x, const vecN<N, T>& y)
175 {
176  for (int i = 0; i < N; ++i) if (x[i] != y[i]) return false;
177  return true;
178 }
179 template<int N, typename T>
180 inline bool operator!=(const vecN<N, T>& x, const vecN<N, T>& y)
181 {
182  return !(x == y);
183 }
184 
185 // vector negation
186 template<int N, typename T>
188 {
189  return vecN<N, T>() - x;
190 }
191 
192 // vector + vector
193 template<int N, typename T>
194 inline vecN<N, T> operator+(const vecN<N, T>& x, const vecN<N, T>& y)
195 {
196  vecN<N, T> r;
197  for (int i = 0; i < N; ++i) r[i] = x[i] + y[i];
198  return r;
199 }
200 
201 // vector - vector
202 template<int N, typename T>
203 inline vecN<N, T> operator-(const vecN<N, T>& x, const vecN<N, T>& y)
204 {
205  vecN<N, T> r;
206  for (int i = 0; i < N; ++i) r[i] = x[i] - y[i];
207  return r;
208 }
209 
210 // scalar * vector
211 template<int N, typename T>
212 inline vecN<N, T> operator*(T x, const vecN<N, T>& y)
213 {
214  vecN<N, T> r;
215  for (int i = 0; i < N; ++i) r[i] = x * y[i];
216  return r;
217 }
218 
219 // vector * scalar
220 template<int N, typename T>
221 inline vecN<N, T> operator*(const vecN<N, T>& x, T y)
222 {
223  vecN<N, T> r;
224  for (int i = 0; i < N; ++i) r[i] = x[i] * y;
225  return r;
226 }
227 
228 // vector * vector
229 template<int N, typename T>
230 inline vecN<N, T> operator*(const vecN<N, T>& x, const vecN<N, T>& y)
231 {
232  vecN<N, T> r;
233  for (int i = 0; i < N; ++i) r[i] = x[i] * y[i];
234  return r;
235 }
236 
237 // element-wise reciprocal
238 template<int N, typename T>
239 inline vecN<N, T> recip(const vecN<N, T>& x)
240 {
241  vecN<N, T> r;
242  for (int i = 0; i < N; ++i) r[i] = T(1) / x[i];
243  return r;
244 }
245 
246 // scalar / vector
247 template<int N, typename T>
248 inline vecN<N, T> operator/(T x, const vecN<N, T>& y)
249 {
250  return x * recip(y);
251 }
252 
253 // vector / scalar
254 template<int N, typename T>
255 inline vecN<N, T> operator/(const vecN<N, T>& x, T y)
256 {
257  return x * (T(1) / y);
258 }
259 
260 // vector / vector
261 template<int N, typename T>
262 inline vecN<N, T> operator/(const vecN<N, T>& x, const vecN<N, T>& y)
263 {
264  return x * recip(y);
265 }
266 
267 // min(vector, vector)
268 template<int N, typename T>
269 inline vecN<N, T> min(const vecN<N, T>& x, const vecN<N, T>& y)
270 {
271  vecN<N, T> r;
272  for (int i = 0; i < N; ++i) r[i] = std::min(x[i], y[i]);
273  return r;
274 }
275 
276 // min(vector, vector)
277 template<int N, typename T>
278 inline T min_component(const vecN<N, T>& x)
279 {
280  T r = x[0];
281  for (int i = 1; i < N; ++i) r = std::min(r, x[i]);
282  return r;
283 }
284 
285 // max(vector, vector)
286 template<int N, typename T>
287 inline vecN<N, T> max(const vecN<N, T>& x, const vecN<N, T>& y)
288 {
289  vecN<N, T> r;
290  for (int i = 0; i < N; ++i) r[i] = std::max(x[i], y[i]);
291  return r;
292 }
293 
294 // clamp(vector, vector, vector)
295 template<int N, typename T>
296 inline vecN<N, T> clamp(const vecN<N, T>& x, const vecN<N, T>& minVal, const vecN<N, T>& maxVal)
297 {
298  return min(maxVal, max(minVal, x));
299 }
300 
301 // clamp(vector, scalar, scalar)
302 template<int N, typename T>
303 inline vecN<N, T> clamp(const vecN<N, T>& x, T minVal, T maxVal)
304 {
305  return clamp(x, vecN<N, T>(minVal), vecN<N, T>(maxVal));
306 }
307 
308 // sum of components
309 template<int N, typename T>
310 inline T sum(const vecN<N, T>& x)
311 {
312  T result(0);
313  for (int i = 0; i < N; ++i) result += x[i];
314  return result;
315 }
316 template<int N, typename T>
318 {
319  return vecN<N, T>(sum(x));
320 }
321 
322 // dot product
323 template<int N, typename T>
324 inline T dot(const vecN<N, T>& x, const vecN<N, T>& y)
325 {
326  return sum(x * y);
327 }
328 template<int N, typename T>
329 inline vecN<N, T> dot_broadcast(const vecN<N, T>& x, const vecN<N, T>& y)
330 {
331  return sum_broadcast(x * y);
332 }
333 
334 // squared length (norm-2)
335 template<int N, typename T>
336 inline T length2(const vecN<N, T>& x)
337 {
338  return dot(x, x);
339 }
340 
341 // length (norm-2)
342 template<int N, typename T>
343 inline T length(const vecN<N, T>& x)
344 {
345  return sqrt(length2(x));
346 }
347 
348 // normalize vector
349 template<int N, typename T>
351 {
352  return x * rsqrt(length2(x));
353 }
354 
355 // cross product (only defined for vectors of length 3)
356 template<typename T>
357 inline vecN<3, T> cross(const vecN<3, T>& x, const vecN<3, T>& y)
358 {
359  return vecN<3, T>(x[1] * y[2] - x[2] * y[1],
360  x[2] * y[0] - x[0] * y[2],
361  x[0] * y[1] - x[1] * y[0]);
362 }
363 
364 // round each component to the nearest integer (returns a vector of integers)
365 template<int N, typename T>
366 inline vecN<N, int> round(const vecN<N, T>& x)
367 {
368  vecN<N, int> r;
369  // note: std::lrint() is more generic (e.g. also works with double),
370  // but Dingux doesn't seem to have std::lrint().
371  for (int i = 0; i < N; ++i) r[i] = lrintf(x[i]);
372  return r;
373 }
374 
375 // truncate each component to the nearest integer that is not bigger in
376 // absolute value (returns a vector of integers)
377 template<int N, typename T>
378 inline vecN<N, int> trunc(const vecN<N, T>& x)
379 {
380  vecN<N, int> r;
381  for (int i = 0; i < N; ++i) r[i] = int(x[i]);
382  return r;
383 }
384 
385 // Textual representation. (Only) used to debug unittest.
386 template<int N, typename T>
387 std::ostream& operator<<(std::ostream& os, const vecN<N, T>& x)
388 {
389  os << "[ ";
390  for (int i = 0; i < N; ++i) {
391  os << x[i] << ' ';
392  }
393  os << ']';
394  return os;
395 }
396 
397 } // namespace gl
398 
399 
400 // --- SSE optimizations ---
401 
402 #ifdef __SSE__
403 #include <xmmintrin.h>
404 
405 // Optionally also use SSE3 and SSE4.1
406 #ifdef __SSE3__
407 #include <pmmintrin.h>
408 #endif
409 #ifdef __SSE4_1__
410 #include <smmintrin.h>
411 #endif
412 
413 namespace gl {
414 
415 // Specialization: implement a vector of 4 floats using SSE instructions
416 template<> class vecN<4, float>
417 {
418 public:
419  vecN()
420  {
421  e = _mm_setzero_ps();
422  }
423 
424  explicit vecN(float x)
425  {
426  e = _mm_set1_ps(x);
427  }
428 
429  vecN(float x, const vecN<3, float>& yzw)
430  {
431  e = _mm_setr_ps(x, yzw[0], yzw[1], yzw[2]);
432  }
433 
434  vecN(const vecN<3, float>& xyz, float w)
435  {
436  e = _mm_setr_ps(xyz[0], xyz[1], xyz[2], w);
437  }
438 
439  vecN(const vecN<2, float>& xy, const vecN<2, float>& zw)
440  {
441  e = _mm_setr_ps(xy[0], xy[1], zw[0], zw[1]);
442  }
443 
444  vecN(float x, float y, float z, float w)
445  {
446  e = _mm_setr_ps(x, y, z, w);
447  }
448 
449  float operator[](int i) const { return e_[i]; }
450  float& operator[](int i) { return e_[i]; }
451 
452  vecN& operator+=(vecN x) { *this = *this + x; ; return *this; }
453  vecN& operator-=(vecN x) { *this = *this - x; ; return *this; }
454  vecN& operator*=(vecN x) { *this = *this * x; ; return *this; }
455  vecN& operator*=(float x) { *this = *this * x; ; return *this; }
456 
457  explicit vecN(__m128 x) : e(x) {}
458  __m128 sse() const { return e; }
459 
460 private:
461  // With gcc we don't need this union. With clang we need it to
462  // be able to write individual components.
463  union {
464  __m128 e;
465  float e_[4];
466  };
467 };
468 
469 inline bool operator==(vec4 x, vec4 y)
470 {
471  return _mm_movemask_ps(_mm_cmpeq_ps(x.sse(), y.sse())) == 15;
472 }
473 
474 inline vec4 operator+(vec4 x, vec4 y)
475 {
476  return vec4(_mm_add_ps(x.sse(), y.sse()));
477 }
478 
479 inline vec4 operator-(vec4 x, vec4 y)
480 {
481  return vec4(_mm_sub_ps(x.sse(), y.sse()));
482 }
483 
484 inline vec4 operator*(float x, vec4 y)
485 {
486  return vec4(_mm_mul_ps(_mm_set1_ps(x), y.sse()));
487 }
488 
489 inline vec4 operator*(vec4 x, float y)
490 {
491  return vec4(_mm_mul_ps(x.sse(), _mm_set1_ps(y)));
492 }
493 
494 inline vec4 operator*(vec4 x, vec4 y)
495 {
496  return vec4(_mm_mul_ps(x.sse(), y.sse()));
497 }
498 
499 #ifdef __SSE3__
500 inline float sum(vec4 x)
501 {
502  __m128 t = _mm_hadd_ps(x.sse(), x.sse());
503  return _mm_cvtss_f32(_mm_hadd_ps(t, t));
504 }
505 inline vec4 sum_broadcast(vec4 x)
506 {
507  __m128 t = _mm_hadd_ps(x.sse(), x.sse());
508  return vec4(_mm_hadd_ps(t, t));
509 }
510 #else
511 inline float sum(vec4 x)
512 {
513  __m128 t0 = x.sse();
514  __m128 t1 = _mm_add_ps(t0, _mm_movehl_ps (t0, t0));
515  __m128 t2 = _mm_add_ps(t1, _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1,1,1,1)));
516  return _mm_cvtss_f32(t2);
517 }
518 inline vec4 sum_broadcast(vec4 x)
519 {
520  __m128 t0 = x.sse();
521  __m128 t1 = _mm_add_ps(t0, _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2,3,0,1)));
522  __m128 t2 = _mm_add_ps(t1, _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1,0,3,2)));
523  return vec4(t2);
524 }
525 #endif
526 
527 inline vec4 min(vec4 x, vec4 y)
528 {
529  return vec4(_mm_min_ps(x.sse(), y.sse()));
530 }
531 
532 inline vec4 max(vec4 x, vec4 y)
533 {
534  return vec4(_mm_max_ps(x.sse(), y.sse()));
535 }
536 
537 #ifdef __SSE4_1__
538 inline float dot(vec4 x, vec4 y)
539 {
540  return _mm_cvtss_f32(_mm_dp_ps(x.sse(), y.sse(), 0xF1));
541 }
542 inline vec4 dot_broadcast(vec4 x, vec4 y)
543 {
544  return vec4(_mm_dp_ps(x.sse(), y.sse(), 0xFF));
545 }
546 #endif
547 
548 inline vec4 normalize(vec4 x)
549 {
550  // Use 1 Newton-Raphson step to improve 1/sqrt(a) approximation:
551  // let s0 be the initial approximation, then
552  // s1 = (3 - s0^2 * a) * (s0 / 2) is a better approximation
553  vec4 l2 = dot_broadcast(x, x);
554  __m128 s0 = _mm_rsqrt_ps(l2.sse());
555  __m128 ss = _mm_mul_ps(s0, s0);
556  __m128 h = _mm_mul_ps(_mm_set1_ps(-0.5f), s0);
557  __m128 m3 = _mm_sub_ps(_mm_mul_ps(l2.sse(), ss), _mm_set1_ps(3.0f));
558  __m128 s1 = _mm_mul_ps(h, m3);
559  return vec4(_mm_mul_ps(x.sse(), s1));
560 }
561 
562 inline vec4 recip(vec4 a)
563 {
564  // Use 1 Newton-Raphson step to improve 1/a approximation:
565  // let x0 be the initial approximation, then
566  // x1 = x0 + x0 - a * x0 * x0 is a better approximation
567  __m128 x0 = _mm_rcp_ps(a.sse());
568  __m128 m0 = _mm_mul_ps(x0, a.sse());
569  __m128 m1 = _mm_mul_ps(x0, m0);
570  __m128 a0 = _mm_add_ps(x0, x0);
571  __m128 x1 = _mm_sub_ps(a0, m1);
572  return vec4(x1);
573 }
574 
575 } // namespace gl
576 
577 #endif // __SSE__
578 
579 #endif // GL_VEC_HH
T min_component(const vecN< N, T > &x)
Definition: gl_vec.hh:278
T length(const vecN< N, T > &x)
Definition: gl_vec.hh:343
bool operator==(const matMxN< M, N, T > &A, const matMxN< M, N, T > &B)
Definition: gl_mat.hh:166
float rsqrt(float x)
Definition: gl_vec.hh:150
T length2(const vecN< N, T > &x)
Definition: gl_vec.hh:336
vecN & operator*=(const vecN &x)
Definition: gl_vec.hh:130
vecN< N, T > min(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:269
vecN(T x, T y)
Definition: gl_vec.hh:66
vecN(const vecN< N1, T > &x, T y)
Definition: gl_vec.hh:97
#define M_PI
Definition: Math.hh:26
vecN(T x)
Definition: gl_vec.hh:46
vecN< N, int > trunc(const vecN< N, T > &x)
Definition: gl_vec.hh:378
vecN< N, T > normalize(const vecN< N, T > &x)
Definition: gl_vec.hh:350
vecN(const vecN< N2, T > &x)
Definition: gl_vec.hh:59
T degrees(T r)
Definition: gl_vec.hh:164
T radians(T d)
Definition: gl_vec.hh:160
vecN< N, T > max(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:287
vecN(T x, T y, T z, T w)
Definition: gl_vec.hh:80
T sum(const vecN< N, T > &x)
Definition: gl_vec.hh:310
vecN< 3, T > cross(const vecN< 3, T > &x, const vecN< 3, T > &y)
Definition: gl_vec.hh:357
matMxN< M, N, T > operator+(const matMxN< M, N, T > &A, const matMxN< M, N, T > &B)
Definition: gl_mat.hh:179
vecN(T x, T y, T z)
Definition: gl_vec.hh:73
vecN< N, T > recip(const vecN< N, T > &x)
Definition: gl_vec.hh:239
vecN< N, int > round(const vecN< N, T > &x)
Definition: gl_vec.hh:366
matMxN< M, N, T > operator-(const matMxN< M, N, T > &A, const matMxN< M, N, T > &B)
Definition: gl_mat.hh:188
vecN< 4, float > vec4
Definition: gl_vec.hh:141
vecN(T x, const vecN< N2, T > &y)
Definition: gl_vec.hh:88
vecN & operator-=(const vecN &x)
Definition: gl_vec.hh:129
T operator[](unsigned i) const
Definition: gl_vec.hh:114
T dot(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:324
vecN & operator+=(const vecN &x)
Definition: gl_vec.hh:128
vecN< N, T > clamp(const vecN< N, T > &x, const vecN< N, T > &minVal, const vecN< N, T > &maxVal)
Definition: gl_vec.hh:296
matMxN< M, N, T > operator*(T x, const matMxN< M, N, T > &A)
Definition: gl_mat.hh:204
vecN< N, T > operator/(T x, const vecN< N, T > &y)
Definition: gl_vec.hh:248
vecN< N, T > dot_broadcast(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:329
vecN(const vecN< N1, T > &x, const vecN< N2, T > &y)
Definition: gl_vec.hh:106
T & operator[](unsigned i)
Definition: gl_vec.hh:120
vecN(const vecN< N, T2 > &x)
Definition: gl_vec.hh:53
bool operator!=(const matMxN< M, N, T > &A, const matMxN< M, N, T > &B)
Definition: gl_mat.hh:172
TclObject t
Definition: gl_mat.hh:24
vecN< N, T > sum_broadcast(const vecN< N, T > &x)
Definition: gl_vec.hh:317
vecN & operator*=(T x)
Definition: gl_vec.hh:131
vecN()
Definition: gl_vec.hh:40