From 705a0ab6770459d89474b3495b05214315a49c94 Mon Sep 17 00:00:00 2001 From: Graham Linden Date: Tue, 25 Jun 2013 11:16:27 -0700 Subject: NORSPEC-288 WIP fixes the issue with normal gen, but may be a bigger perf hit than we want --- indra/llmath/llvector4a.inl | 20 +++++++++++ indra/llmath/llvolume.cpp | 83 ++++++++++++++++++++++++--------------------- 2 files changed, 64 insertions(+), 39 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7c52ffef21..558fe09323 100755 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -409,6 +409,26 @@ inline void LLVector4a::normalize3fast() mQ = _mm_mul_ps( mQ, approxRsqrt ); } +inline void LLVector4a::normalize3fast_checked(LLVector4a* d) +{ + if (!isFinite3()) + { + *this = d ? *d : LLVector4a(0,1,0,1); + return; + } + + LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); + + if (lenSqrd.getF32ptr()[0] <= FLT_EPSILON) + { + *this = d ? *d : LLVector4a(0,1,0,1); + return; + } + + const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); + mQ = _mm_mul_ps( mQ, approxRsqrt ); +} + // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const { diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 14cebfe5aa..58cac57e7f 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4472,6 +4472,9 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, continue; //skip degenerate face } + LLVector4a default_norm; + default_norm.set(0,1,0,1); + //for each edge for (S32 k = 0; k < 3; k++) { S32 index = face.mEdge[j*3+k]; @@ -4493,14 +4496,14 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, norm_mat.rotate(n[v1], t); - t.normalize3fast(); + t.normalize3fast_checked(&default_norm); normals.push_back(LLVector3(t[0], t[1], t[2])); mat.affineTransform(v[v2], t); vertices.push_back(LLVector3(t[0], t[1], t[2])); norm_mat.rotate(n[v2], t); - t.normalize3fast(); + t.normalize3fast_checked(&default_norm); normals.push_back(LLVector3(t[0], t[1], t[2])); } } @@ -6096,6 +6099,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { VertexData corners[4]; VertexData baseVert; + LLVector4a default_norm; + default_norm.set(0,1,0,1); + for(S32 t = 0; t < 4; t++) { corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); @@ -6108,8 +6114,8 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) lhs.setSub(corners[1].getPosition(), corners[0].getPosition()); LLVector4a rhs; rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); - baseVert.getNormal().setCross3(lhs, rhs); - baseVert.getNormal().normalize3fast(); + baseVert.getNormal().setCross3(lhs, rhs); + baseVert.getNormal().normalize3fast_checked(&default_norm); } if(!(mTypeMask & TOP_MASK)) @@ -6559,17 +6565,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); LLVector4a normal; - normal.setCross3(d0,d1); - - if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO) - { - normal.normalize3fast(); - } - else - { //degenerate, make up a value - normal.set(0,0,1); - } + LLVector4a default_norm; + default_norm.set(0,1,0,1); + normal.setCross3(d0,d1); + normal.normalize3fast_checked(&default_norm); + llassert(llfinite(normal.getF32ptr()[0])); llassert(llfinite(normal.getF32ptr()[1])); llassert(llfinite(normal.getF32ptr()[2])); @@ -6611,11 +6612,13 @@ void LLVolumeFace::createTangents() CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents); //normalize tangents + LLVector4a default_norm; + default_norm.set(0,1,0,1); for (U32 i = 0; i < mNumVertices; i++) { //binorm[i].normalize3fast(); //bump map/planar projection code requires normals to be normalized - mNormals[i].normalize3fast(); + mNormals[i].normalize3fast_checked(&default_norm); } } } @@ -6793,6 +6796,9 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat mat.loadu(mat_in); norm_mat.loadu(norm_mat_in); + LLVector4a default_norm; + default_norm.set(0,1,0,1); + for (U32 i = 0; i < face.mNumVertices; ++i) { //transform appended face position and store @@ -6800,7 +6806,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat //transform appended face normal and store norm_mat.rotate(src_norm[i], dst_norm[i]); - dst_norm[i].normalize3fast(); + dst_norm[i].normalize3fast_checked(&default_norm); //copy appended face texture coordinate dst_tc[i] = src_tc[i]; @@ -7213,42 +7219,41 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) { - //LLVector4a *tan1 = new LLVector4a[vertexCount * 2]; LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a)); - LLVector4a* tan2 = tan1 + vertexCount; + LLVector4a* tan2 = tan1 + vertexCount; memset(tan1, 0, vertexCount*2*sizeof(LLVector4a)); - for (U32 a = 0; a < triangleCount; a++) - { - U32 i1 = *index_array++; - U32 i2 = *index_array++; - U32 i3 = *index_array++; + for (U32 a = 0; a < triangleCount; a++) + { + U32 i1 = *index_array++; + U32 i2 = *index_array++; + U32 i3 = *index_array++; - const LLVector4a& v1 = vertex[i1]; - const LLVector4a& v2 = vertex[i2]; - const LLVector4a& v3 = vertex[i3]; + const LLVector4a& v1 = vertex[i1]; + const LLVector4a& v2 = vertex[i2]; + const LLVector4a& v3 = vertex[i3]; - const LLVector2& w1 = texcoord[i1]; - const LLVector2& w2 = texcoord[i2]; - const LLVector2& w3 = texcoord[i3]; + const LLVector2& w1 = texcoord[i1]; + const LLVector2& w2 = texcoord[i2]; + const LLVector2& w3 = texcoord[i3]; const F32* v1ptr = v1.getF32ptr(); const F32* v2ptr = v2.getF32ptr(); const F32* v3ptr = v3.getF32ptr(); - float x1 = v2ptr[0] - v1ptr[0]; - float x2 = v3ptr[0] - v1ptr[0]; - float y1 = v2ptr[1] - v1ptr[1]; - float y2 = v3ptr[1] - v1ptr[1]; - float z1 = v2ptr[2] - v1ptr[2]; - float z2 = v3ptr[2] - v1ptr[2]; + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; - float s1 = w2.mV[0] - w1.mV[0]; - float s2 = w3.mV[0] - w1.mV[0]; - float t1 = w2.mV[1] - w1.mV[1]; - float t2 = w3.mV[1] - w1.mV[1]; + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; F32 rd = s1*t2-s2*t1; -- cgit v1.3 From c5fc5d9b46121bb0d44d3b35402d604514e2dd4e Mon Sep 17 00:00:00 2001 From: Graham Linden Date: Sun, 28 Jul 2013 15:43:10 -0700 Subject: NORSPEC-323 fix norm/tangent gen for prim caps --- indra/llmath/llvolume.cpp | 253 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 202 insertions(+), 51 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 58cac57e7f..b90830ddd8 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -94,6 +94,16 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; +void calc_tangent_from_triangle( + LLVector4a& normal, + LLVector4a& tangent_out, + const LLVector4a& v1, + const LLVector2& w1, + const LLVector4a& v2, + const LLVector2& w2, + const LLVector4a& v3, + const LLVector2& w3); + BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -1594,7 +1604,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, S32 sides = (S32)llfloor(llfloor((MIN_DETAIL_FACES * detail + twist_mag * 3.5f * (detail-0.5f))) * params.getRevolutions()); if (is_sculpted) - sides = llmax(sculpt_size, 1); + sides = llmax(sculpt_size,1); genNGon(params, sides); } @@ -2062,7 +2072,7 @@ LLVolume::LLVolume(const LLVolumeParams ¶ms, const F32 detail, const BOOL ge generate(); - if (mParams.getSculptID().isNull() && mParams.getSculptType() == LL_SCULPT_TYPE_NONE || mParams.getSculptType() == LL_SCULPT_TYPE_MESH) + if (mParams.getSculptID().isNull() && ((mParams.getSculptType() == LL_SCULPT_TYPE_NONE) || (mParams.getSculptType() == LL_SCULPT_TYPE_MESH))) { createVolumeFaces(); } @@ -4472,9 +4482,6 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, continue; //skip degenerate face } - LLVector4a default_norm; - default_norm.set(0,1,0,1); - //for each edge for (S32 k = 0; k < 3; k++) { S32 index = face.mEdge[j*3+k]; @@ -4496,14 +4503,14 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, norm_mat.rotate(n[v1], t); - t.normalize3fast_checked(&default_norm); + t.normalize3fast(); normals.push_back(LLVector3(t[0], t[1], t[2])); mat.affineTransform(v[v2], t); vertices.push_back(LLVector3(t[0], t[1], t[2])); norm_mat.rotate(n[v2], t); - t.normalize3fast_checked(&default_norm); + t.normalize3fast(); normals.push_back(LLVector3(t[0], t[1], t[2])); } } @@ -5282,6 +5289,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) if (src.mTangents) { allocateTangents(src.mNumVertices); + llassert(mTangents); LLVector4a::memcpyNonAliased16((F32*) mTangents, (F32*) src.mTangents, vert_size); } else @@ -5293,6 +5301,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) if (src.mWeights) { allocateWeights(src.mNumVertices); + llassert(mWeights); LLVector4a::memcpyNonAliased16((F32*) mWeights, (F32*) src.mWeights, vert_size); } else @@ -5308,14 +5317,14 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size); } - + //delete return *this; } LLVolumeFace::~LLVolumeFace() { - ll_aligned_free_16(mExtents); + ll_aligned_free_16(mExtents); mExtents = NULL; freeData(); @@ -5325,7 +5334,7 @@ void LLVolumeFace::freeData() { ll_aligned_free_16(mPositions); mPositions = NULL; - ll_aligned_free_16( mNormals); + ll_aligned_free_16(mNormals); mNormals = NULL; ll_aligned_free_16(mTexCoords); mTexCoords = NULL; @@ -5911,10 +5920,10 @@ void LLVolumeFace::cacheOptimize() wght = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } - LLVector4a* binorm = NULL; + LLVector4a* tangent = NULL; if (mTangents) { - binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + tangent = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } //allocate mapping of old indices to new indices @@ -5939,7 +5948,7 @@ void LLVolumeFace::cacheOptimize() } if (mTangents) { - binorm[cur_idx] = mTangents[idx]; + tangent[cur_idx] = mTangents[idx]; } cur_idx++; @@ -5961,7 +5970,7 @@ void LLVolumeFace::cacheOptimize() mNormals = norm; mTexCoords = tc; mWeights = wght; - mTangents = binorm; + mTangents = tangent; //std::string result = llformat("ACMR pre/post: %.3f/%.3f -- %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks); //llinfos << result << llendl; @@ -6099,9 +6108,6 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { VertexData corners[4]; VertexData baseVert; - LLVector4a default_norm; - default_norm.set(0,1,0,1); - for(S32 t = 0; t < 4; t++) { corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); @@ -6115,7 +6121,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) LLVector4a rhs; rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); baseVert.getNormal().setCross3(lhs, rhs); - baseVert.getNormal().normalize3fast_checked(&default_norm); + baseVert.getNormal().normalize3fast(); } if(!(mTypeMask & TOP_MASK)) @@ -6312,6 +6318,43 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) cuv = (min_uv + max_uv)*0.5f; + + LLVector4a tangent; + calc_tangent_from_triangle( + *norm, + tangent, + *mCenter, cuv, + pos[0], tc[0], + pos[1], tc[1]); + + if (tangent.getLength3() < 0.01) + { + tangent.set(1,0,0,1); + } + else + { + LLVector4a default_tangent; + default_tangent.set(1,0,0,1); + tangent.normalize3fast_checked(&default_tangent); + } + + LLVector4a normal; + LLVector4a d0, d1; + + d0.setSub(*mCenter, pos[0]); + d1.setSub(*mCenter, pos[1]); + + if (mTypeMask & TOP_MASK) + { + normal.setCross3(d0, d1); + } + else + { + normal.setCross3(d1, d0); + } + + normal.normalize3fast_checked(); + VertexData vd; vd.setPosition(*mCenter); vd.mTexCoord = cuv; @@ -6323,7 +6366,15 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) num_vertices++; } - + + allocateTangents(num_vertices); + + for (S32 i = 0; i < num_vertices; i++) + { + mTangents[i].load4a(tangent.getF32ptr()); + norm[i].load4a(normal.getF32ptr()); + } + if (partial_build) { return TRUE; @@ -6559,31 +6610,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } - LLVector4a d0,d1; - - d0.setSub(mPositions[mIndices[1]], mPositions[mIndices[0]]); - d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); - - LLVector4a normal; - LLVector4a default_norm; - default_norm.set(0,1,0,1); - - normal.setCross3(d0,d1); - normal.normalize3fast_checked(&default_norm); - - llassert(llfinite(normal.getF32ptr()[0])); - llassert(llfinite(normal.getF32ptr()[1])); - llassert(llfinite(normal.getF32ptr()[2])); - - llassert(!llisnan(normal.getF32ptr()[0])); - llassert(!llisnan(normal.getF32ptr()[1])); - llassert(!llisnan(normal.getF32ptr()[2])); - - for (S32 i = 0; i < num_vertices; i++) - { - norm[i].load4a(normal.getF32ptr()); - } - return TRUE; } @@ -6616,9 +6642,9 @@ void LLVolumeFace::createTangents() default_norm.set(0,1,0,1); for (U32 i = 0; i < mNumVertices; i++) { - //binorm[i].normalize3fast(); + //tangent[i].normalize3fast(); //bump map/planar projection code requires normals to be normalized - mNormals[i].normalize3fast_checked(&default_norm); + mNormals[i].normalize3fast_checked(); } } } @@ -6693,7 +6719,7 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con void LLVolumeFace::allocateTangents(S32 num_verts) { - ll_aligned_free_16(mTangents); + ll_aligned_free_16(mTangents); mTangents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } @@ -6796,9 +6822,6 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat mat.loadu(mat_in); norm_mat.loadu(norm_mat_in); - LLVector4a default_norm; - default_norm.set(0,1,0,1); - for (U32 i = 0; i < face.mNumVertices; ++i) { //transform appended face position and store @@ -6806,7 +6829,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat //transform appended face normal and store norm_mat.rotate(src_norm[i], dst_norm[i]); - dst_norm[i].normalize3fast_checked(&default_norm); + dst_norm[i].normalize3fast_checked(); //copy appended face texture coordinate dst_tc[i] = src_tc[i]; @@ -7215,6 +7238,134 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) return TRUE; } +// Finds binormal based on three vertices with texture coordinates. +// Fills in dummy values if the triangle has degenerate texture coordinates. +void calc_binormal_from_triangle(LLVector4a& binormal, + + const LLVector4a& pos0, + const LLVector2& tex0, + const LLVector4a& pos1, + const LLVector2& tex1, + const LLVector4a& pos2, + const LLVector2& tex2) +{ + LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] ); + + LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] ); + + LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] ); + + LLVector4a lhs, rhs; + + LLVector4a r0; + lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2); + r0.setCross3(lhs, rhs); + + LLVector4a r1; + lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2); + r1.setCross3(lhs, rhs); + + LLVector4a r2; + lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2); + r2.setCross3(lhs, rhs); + + if( r0[VX] && r1[VX] && r2[VX] ) + { + binormal.set( + -r0[VZ] / r0[VX], + -r1[VZ] / r1[VX], + -r2[VZ] / r2[VX]); + // binormal.normVec(); + } + else + { + binormal.set( 0, 1 , 0 ); + } +} + +// Finds binormal based on three vertices with texture coordinates. +// Fills in dummy values if the triangle has degenerate texture coordinates. +void calc_tangent_from_triangle( + LLVector4a& normal, + LLVector4a& tangent_out, + const LLVector4a& v1, + const LLVector2& w1, + const LLVector4a& v2, + const LLVector2& w2, + const LLVector4a& v3, + const LLVector2& w3) +{ + const F32* v1ptr = v1.getF32ptr(); + const F32* v2ptr = v2.getF32ptr(); + const F32* v3ptr = v3.getF32ptr(); + + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; + + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; + + F32 rd = s1*t2-s2*t1; + + float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero + + llassert(llfinite(r)); + llassert(!llisnan(r)); + + LLVector4a sdir( + (t2 * x1 - t1 * x2) * r, + (t2 * y1 - t1 * y2) * r, + (t2 * z1 - t1 * z2) * r); + + LLVector4a tdir( + (s1 * x2 - s2 * x1) * r, + (s1 * y2 - s2 * y1) * r, + (s1 * z2 - s2 * z1) * r); + + LLVector4a n = normal; + LLVector4a t = sdir; + + LLVector4a ncrosst; + ncrosst.setCross3(n,t); + + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); + + LLVector4a tsubn; + tsubn.setSub(t,n); + + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast_checked(); + + // Calculate handedness + F32 handedness = ncrosst.dot3(tdir).getF32() < 0.f ? -1.f : 1.f; + + tsubn.getF32ptr()[3] = handedness; + + tangent_out = tsubn; + } + else + { + // degenerate, make up a value + // + tangent_out.set(0,0,1,1); + } + +} + //adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) -- cgit v1.3 From 62011c2871bdb09b92c56d2959eed64ba6ec3e1f Mon Sep 17 00:00:00 2001 From: Graham Linden Date: Mon, 19 Aug 2013 12:56:04 -0700 Subject: Fix merge issues from genBBoxes optimizations fighting with genBBoxes optimizations and missing statics --- indra/llmath/llvolume.cpp | 157 +++++++++++++++++++++---------------------- indra/newview/llface.cpp | 98 ++++++++++----------------- indra/newview/llface.h | 3 +- indra/newview/llvovolume.cpp | 2 +- indra/newview/pipeline.cpp | 2 + indra/newview/pipeline.h | 4 +- 6 files changed, 119 insertions(+), 147 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index ad0ca618a0..a030d889af 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -136,6 +136,82 @@ BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* cent return true; } +// Finds tangent vec based on three vertices with texture coordinates. +// Fills in dummy values if the triangle has degenerate texture coordinates. +void calc_tangent_from_triangle( + LLVector4a& normal, + LLVector4a& tangent_out, + const LLVector4a& v1, + const LLVector2& w1, + const LLVector4a& v2, + const LLVector2& w2, + const LLVector4a& v3, + const LLVector2& w3) +{ + const F32* v1ptr = v1.getF32ptr(); + const F32* v2ptr = v2.getF32ptr(); + const F32* v3ptr = v3.getF32ptr(); + + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; + + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; + + F32 rd = s1*t2-s2*t1; + + float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero + + llassert(llfinite(r)); + llassert(!llisnan(r)); + + LLVector4a sdir( + (t2 * x1 - t1 * x2) * r, + (t2 * y1 - t1 * y2) * r, + (t2 * z1 - t1 * z2) * r); + + LLVector4a tdir( + (s1 * x2 - s2 * x1) * r, + (s1 * y2 - s2 * y1) * r, + (s1 * z2 - s2 * z1) * r); + + LLVector4a n = normal; + LLVector4a t = sdir; + + LLVector4a ncrosst; + ncrosst.setCross3(n,t); + + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); + + LLVector4a tsubn; + tsubn.setSub(t,n); + + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast_checked(); + + // Calculate handedness + F32 handedness = ncrosst.dot3(tdir).getF32() < 0.f ? -1.f : 1.f; + + tsubn.getF32ptr()[3] = handedness; + + tangent_out = tsubn; + } + else + { + // degenerate, make up a value + // + tangent_out.set(0,0,1,1); + } + +} // intersect test between triangle vert0, vert1, vert2 and a ray from orig in direction dir. @@ -5908,10 +5984,10 @@ void LLVolumeFace::cacheOptimize() wght = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } - LLVector4a* binorm = NULL; + LLVector4a* tangent = NULL; if (mTangents) { - binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + tangent = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } //allocate mapping of old indices to new indices @@ -7277,83 +7353,6 @@ void calc_binormal_from_triangle(LLVector4a& binormal, } } -// Finds binormal based on three vertices with texture coordinates. -// Fills in dummy values if the triangle has degenerate texture coordinates. -void calc_tangent_from_triangle( - LLVector4a& normal, - LLVector4a& tangent_out, - const LLVector4a& v1, - const LLVector2& w1, - const LLVector4a& v2, - const LLVector2& w2, - const LLVector4a& v3, - const LLVector2& w3) -{ - const F32* v1ptr = v1.getF32ptr(); - const F32* v2ptr = v2.getF32ptr(); - const F32* v3ptr = v3.getF32ptr(); - - float x1 = v2ptr[0] - v1ptr[0]; - float x2 = v3ptr[0] - v1ptr[0]; - float y1 = v2ptr[1] - v1ptr[1]; - float y2 = v3ptr[1] - v1ptr[1]; - float z1 = v2ptr[2] - v1ptr[2]; - float z2 = v3ptr[2] - v1ptr[2]; - - float s1 = w2.mV[0] - w1.mV[0]; - float s2 = w3.mV[0] - w1.mV[0]; - float t1 = w2.mV[1] - w1.mV[1]; - float t2 = w3.mV[1] - w1.mV[1]; - - F32 rd = s1*t2-s2*t1; - - float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero - - llassert(llfinite(r)); - llassert(!llisnan(r)); - - LLVector4a sdir( - (t2 * x1 - t1 * x2) * r, - (t2 * y1 - t1 * y2) * r, - (t2 * z1 - t1 * z2) * r); - - LLVector4a tdir( - (s1 * x2 - s2 * x1) * r, - (s1 * y2 - s2 * y1) * r, - (s1 * z2 - s2 * z1) * r); - - LLVector4a n = normal; - LLVector4a t = sdir; - - LLVector4a ncrosst; - ncrosst.setCross3(n,t); - - // Gram-Schmidt orthogonalize - n.mul(n.dot3(t).getF32()); - - LLVector4a tsubn; - tsubn.setSub(t,n); - - if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) - { - tsubn.normalize3fast_checked(); - - // Calculate handedness - F32 handedness = ncrosst.dot3(tdir).getF32() < 0.f ? -1.f : 1.f; - - tsubn.getF32ptr()[3] = handedness; - - tangent_out = tsubn; - } - else - { - // degenerate, make up a value - // - tangent_out.set(0,0,1,1); - } - -} - //adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 700b31f8d3..53e5b55b89 100755 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -768,7 +768,7 @@ bool less_than_max_mag(const LLVector4a& vec) } BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat_vert_in, const LLMatrix3& mat_normal_in, BOOL global_volume) + const LLMatrix4& mat_vert_in, BOOL global_volume) { //get bounding box if (mDrawablep->isState(LLDrawable::REBUILD_VOLUME | LLDrawable::REBUILD_POSITION | LLDrawable::REBUILD_RIGGED)) @@ -777,10 +777,6 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, LLMatrix4a mat_vert; mat_vert.loadu(mat_vert_in); - LLMatrix4a mat_normal; - mat_normal.loadu(mat_normal_in); - - //VECTORIZE THIS LLVector4a min,max; if (f >= volume.getNumVolumeFaces()) @@ -797,95 +793,69 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, llassert(less_than_max_mag(max)); //min, max are in volume space, convert to drawable render space - LLVector4a center; - LLVector4a t; - t.setAdd(min, max); - t.mul(0.5f); - mat_vert.affineTransform(t, center); - LLVector4a size; - size.setSub(max, min); - size.mul(0.5f); - llassert(less_than_max_mag(min)); - llassert(less_than_max_mag(max)); + //get 8 corners of bounding box + LLVector4Logical mask[6]; - if (!global_volume) + for (U32 i = 0; i < 6; ++i) { - //VECTORIZE THIS - LLVector4a scale; - scale.load3(mDrawablep->getVObj()->getScale().mV); - size.mul(scale); + mask[i].clear(); } - mat_normal.mMatrix[0].normalize3fast(); - mat_normal.mMatrix[1].normalize3fast(); - mat_normal.mMatrix[2].normalize3fast(); + mask[0].setElement<2>(); //001 + mask[1].setElement<1>(); //010 + mask[2].setElement<1>(); //011 + mask[2].setElement<2>(); + mask[3].setElement<0>(); //100 + mask[4].setElement<0>(); //101 + mask[4].setElement<2>(); + mask[5].setElement<0>(); //110 + mask[5].setElement<1>(); - LLVector4a v[4]; + LLVector4a v[8]; - //get 4 corners of bounding box - mat_normal.rotate(size,v[0]); + v[6] = min; + v[7] = max; - //VECTORIZE THIS - LLVector4a scale; - - scale.set(-1.f, -1.f, 1.f); - scale.mul(size); - mat_normal.rotate(scale, v[1]); + for (U32 i = 0; i < 6; ++i) + { + v[i].setSelectWithMask(mask[i], min, max); + } - scale.set(1.f, -1.f, -1.f); - scale.mul(size); - mat_normal.rotate(scale, v[2]); + LLVector4a tv[8]; - scale.set(-1.f, 1.f, -1.f); - scale.mul(size); - mat_normal.rotate(scale, v[3]); + //transform bounding box into drawable space + for (U32 i = 0; i < 8; ++i) + { + mat_vert.affineTransform(v[i], tv[i]); + } + //find bounding box LLVector4a& newMin = mExtents[0]; LLVector4a& newMax = mExtents[1]; - newMin = newMax = center; - - llassert(less_than_max_mag(center)); + newMin = newMax = tv[0]; - for (U32 i = 0; i < 4; i++) + for (U32 i = 1; i < 8; ++i) { - LLVector4a delta; - delta.setAbs(v[i]); - LLVector4a min; - min.setSub(center, delta); - LLVector4a max; - max.setAdd(center, delta); - - newMin.setMin(newMin,min); - newMax.setMax(newMax,max); - - llassert(less_than_max_mag(newMin)); - llassert(less_than_max_mag(newMax)); + newMin.setMin(newMin, tv[i]); + newMax.setMax(newMax, tv[i]); } if (!mDrawablep->isActive()) - { + { // Shift position for region LLVector4a offset; offset.load3(mDrawablep->getRegion()->getOriginAgent().mV); newMin.add(offset); newMax.add(offset); - - llassert(less_than_max_mag(newMin)); - llassert(less_than_max_mag(newMax)); } + LLVector4a t; t.setAdd(newMin, newMax); t.mul(0.5f); - llassert(less_than_max_mag(t)); - - //VECTORIZE THIS mCenterLocal.set(t.getF32ptr()); - llassert(less_than_max_mag(newMin)); - llassert(less_than_max_mag(newMax)); - t.setSub(newMax,newMin); mBoundingSphereRadius = t.getLength3().getF32()*0.5f; diff --git a/indra/newview/llface.h b/indra/newview/llface.h index 763634a3ab..66b5f13740 100755 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -194,8 +194,7 @@ public: void setSize(S32 numVertices, S32 num_indices = 0, bool align = false); - BOOL genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat, BOOL global_volume = FALSE); + BOOL genVolumeBBoxes(const LLVolume &volume, S32 f,const LLMatrix4& mat, BOOL global_volume = FALSE); void init(LLDrawable* drawablep, LLViewerObject* objp); void destroy(); diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 113bdd2ce3..33b26494de 100755 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -1458,7 +1458,7 @@ BOOL LLVOVolume::genBBoxes(BOOL force_global) continue; } res &= face->genVolumeBBoxes(*volume, i, - mRelativeXform, mRelativeXformInvTrans, + mRelativeXform, /*mRelativeXformInvTrans,*/ (mVolumeImpl && mVolumeImpl->isVolumeGlobal()) || force_global); if (rebuild) diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index c4286e73eb..8be7b2ab53 100755 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -169,6 +169,7 @@ S32 LLPipeline::RenderGlowIterations; F32 LLPipeline::RenderGlowWidth; F32 LLPipeline::RenderGlowStrength; BOOL LLPipeline::RenderDepthOfField; +BOOL LLPipeline::RenderDepthOfFieldInEditMode; F32 LLPipeline::CameraFocusTransitionTime; F32 LLPipeline::CameraFNumber; F32 LLPipeline::CameraFocalLength; @@ -379,6 +380,7 @@ BOOL LLPipeline::sWaterReflections = FALSE; BOOL LLPipeline::sRenderGlow = FALSE; BOOL LLPipeline::sReflectionRender = FALSE; BOOL LLPipeline::sImpostorRender = FALSE; +BOOL LLPipeline::sImpostorRenderAlphaDepthPass = FALSE; BOOL LLPipeline::sUnderWaterRender = FALSE; BOOL LLPipeline::sTextureBindTest = FALSE; BOOL LLPipeline::sRenderFrameTest = FALSE; diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h index f0bebbe20d..fbbcf8f06d 100755 --- a/indra/newview/pipeline.h +++ b/indra/newview/pipeline.h @@ -295,7 +295,8 @@ public: void unbindDeferredShader(LLGLSLShader& shader); void renderDeferredLighting(); - + void renderDeferredLightingToRT(LLRenderTarget* target); + void generateWaterReflection(LLCamera& camera); void generateSunShadow(LLCamera& camera); void generateHighlight(LLCamera& camera); @@ -594,6 +595,7 @@ public: static BOOL sPickAvatar; static BOOL sReflectionRender; static BOOL sImpostorRender; + static BOOL sImpostorRenderAlphaDepthPass; static BOOL sUnderWaterRender; static BOOL sRenderGlow; static BOOL sTextureBindTest; -- cgit v1.3