Mitsuba Renderer  0.5.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
triaccel_sse.h
Go to the documentation of this file.
1 /*
2  This file is part of Mitsuba, a physically based rendering system.
3 
4  Copyright (c) 2007-2014 by Wenzel Jakob and others.
5 
6  Mitsuba is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License Version 3
8  as published by the Free Software Foundation.
9 
10  Mitsuba is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #pragma once
20 #if !defined(__MITSUBA_RENDER_TRIACCEL_SSE_H_)
21 #define __MITSUBA_RENDER_TRIACCEL_SSE_H_
22 
23 #include <mitsuba/render/trimesh.h>
24 
26 
27 FINLINE __m128 rayIntersectPacket(const TriAccel &tri, const RayPacket4 &packet,
28  __m128 mint, __m128 maxt, __m128 inactive, Intersection4 &its) {
29  static const MM_ALIGN16 int waldModulo[4] = { 1, 2, 0, 1 };
30  const int ku = waldModulo[tri.k], kv = waldModulo[tri.k+1];
31 
32  /* Get the u and v components */
33  const __m128
34  o_u = packet.o[ku].ps, o_v = packet.o[kv].ps, o_k = packet.o[tri.k].ps,
35  d_u = packet.d[ku].ps, d_v = packet.d[kv].ps, d_k = packet.d[tri.k].ps;
36 
37  /* Extract data from the first cache line */
38  const __m128
39  line1 = _mm_load_ps((const float *) &tri),
40  n_u = splat_ps(line1, 1),
41  n_v = splat_ps(line1, 2),
42  n_d = splat_ps(line1, 3);
43 
44  const __m128
45  ounu = _mm_mul_ps(o_u, n_u),
46  ovnv = _mm_mul_ps(o_v, n_v),
47  dunu = _mm_mul_ps(d_u, n_u),
48  dvnv = _mm_mul_ps(d_v, n_v);
49 
50  /* Calculate the plane intersection (Typo in the thesis?) */
51  const __m128
52  num = _mm_sub_ps(_mm_sub_ps(_mm_sub_ps(n_d, ounu), ovnv), o_k),
53  denom = _mm_add_ps(_mm_add_ps(dunu, dvnv), d_k);
54 
55  const __m128
56  t = _mm_div_ps(num, denom);
57 
58  __m128 hasIts =
59  _mm_andnot_ps(inactive, _mm_and_ps(_mm_cmpgt_ps(maxt, t), _mm_cmpgt_ps(t, mint)));
60 
61  if (_mm_movemask_ps(hasIts) == 0)
62  return hasIts;
63 
64  /* Extract data from the second cache line */
65  const __m128
66  line2 = _mm_load_ps(&tri.a_u),
67  a_u = splat_ps(line2, 0),
68  a_v = splat_ps(line2, 1),
69  b_nu = splat_ps(line2, 2),
70  b_nv = splat_ps(line2, 3);
71 
72  const __m128
73  hu = _mm_add_ps(o_u, _mm_sub_ps(_mm_mul_ps(t, d_u), a_u)),
74  hv = _mm_add_ps(o_v, _mm_sub_ps(_mm_mul_ps(t, d_v), a_v));
75 
76  /* Extract data from the third cache line */
77  const __m128
78  line3 = _mm_load_ps(&tri.c_nu),
79  c_nu = splat_ps(line3, 0),
80  c_nv = splat_ps(line3, 1);
81  const __m128i
82  primIndex = splat_epi32(pstoepi32(line3), 3),
83  shapeIndex = splat_epi32(pstoepi32(line3), 2);
84 
85  const __m128
86  u = _mm_add_ps(_mm_mul_ps(hv, b_nu), _mm_mul_ps(hu, b_nv)),
87  v = _mm_add_ps(_mm_mul_ps(hu, c_nu), _mm_mul_ps(hv, c_nv));
88 
89  const __m128
90  zero = _mm_setzero_ps(),
91  term1 = _mm_cmpge_ps(u, zero),
92  term2 = _mm_cmpge_ps(v, zero),
93  term3 = _mm_add_ps(u, v);
94 
95  const __m128
96  term4 = _mm_and_ps(term1, term2),
97  term5 = _mm_cmpge_ps(SSEConstants::one.ps, term3);
98 
99  hasIts = _mm_and_ps(hasIts, _mm_and_ps(term4, term5));
100 
101  if (_mm_movemask_ps(hasIts) == 0)
102  return hasIts;
103 
104  its.t.ps = mux_ps(hasIts, t, its.t.ps);
105  its.u.ps = mux_ps(hasIts, u, its.u.ps);
106  its.v.ps = mux_ps(hasIts, v, its.v.ps);
107  its.primIndex.pi = mux_epi32(pstoepi32(hasIts), primIndex, its.primIndex.pi);
108  its.shapeIndex.pi = mux_epi32(pstoepi32(hasIts), shapeIndex, its.shapeIndex.pi);
109 
110  return hasIts;
111 }
112 
114 
115 #endif /* __MITSUBA_RENDER_TRIACCEL_SSE_H_ */
116 
FINLINE __m128 rayIntersectPacket(const TriAccel &tri, const RayPacket4 &packet, __m128 mint, __m128 maxt, __m128 inactive, Intersection4 &its)
Definition: triaccel_sse.h:27
SSEVector t
Definition: ray_sse.h:75
SSEVector u
Definition: ray_sse.h:76
#define MTS_NAMESPACE_BEGIN
Definition: platform.h:137
Float a_u
Definition: triaccel.h:43
SSEVector v
Definition: ray_sse.h:77
SIMD quad-packed ray for coherent ray tracing.
Definition: ray_sse.h:34
Definition: ray_sse.h:74
SSEVector shapeIndex
Definition: ray_sse.h:79
QuadVector o
Definition: ray_sse.h:35
SSEVector primIndex
Definition: ray_sse.h:78
QuadVector d
Definition: ray_sse.h:35
uint32_t k
Definition: triaccel.h:38
Pre-computed triangle representation based on Ingo Wald&#39;s TriAccel layout.
Definition: triaccel.h:37
Float c_nu
Definition: triaccel.h:48
#define MTS_NAMESPACE_END
Definition: platform.h:138