From f5e5396c3a17b6bcdc4eb49cda304a9047920fe1 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Tue, 26 Feb 2013 15:15:08 -0600 Subject: MAINT-2371 First set of profile guided optimizations. Reviewed by Graham --- indra/llmath/llvolume.cpp | 97 ++++++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 44 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 02c8d2b86f..77d89568df 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5187,7 +5187,8 @@ LLVolumeFace::LLVolumeFace() : mTexCoords(NULL), mIndices(NULL), mWeights(NULL), - mOctree(NULL) + mOctree(NULL), + mOptimized(FALSE) { mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); mExtents[0].splat(-0.5f); @@ -5517,14 +5518,14 @@ class LLVCacheVertexData public: S32 mIdx; S32 mCacheTag; - F32 mScore; + F64 mScore; U32 mActiveTriangles; std::vector mTriangles; LLVCacheVertexData() { mCacheTag = -1; - mScore = 0.f; + mScore = 0.0; mActiveTriangles = 0; mIdx = -1; } @@ -5534,13 +5535,13 @@ class LLVCacheTriangleData { public: bool mActive; - F32 mScore; + F64 mScore; LLVCacheVertexData* mVertex[3]; LLVCacheTriangleData() { mActive = true; - mScore = 0.f; + mScore = 0.0; mVertex[0] = mVertex[1] = mVertex[2] = NULL; } @@ -5551,7 +5552,7 @@ public: { if (mVertex[i]) { - llassert_always(mVertex[i]->mActiveTriangles > 0); + llassert(mVertex[i]->mActiveTriangles > 0); mVertex[i]->mActiveTriangles--; } } @@ -5563,44 +5564,44 @@ public: } }; -const F32 FindVertexScore_CacheDecayPower = 1.5f; -const F32 FindVertexScore_LastTriScore = 0.75f; -const F32 FindVertexScore_ValenceBoostScale = 2.0f; -const F32 FindVertexScore_ValenceBoostPower = 0.5f; +const F64 FindVertexScore_CacheDecayPower = 1.5; +const F64 FindVertexScore_LastTriScore = 0.75; +const F64 FindVertexScore_ValenceBoostScale = 2.0; +const F64 FindVertexScore_ValenceBoostPower = 0.5; const U32 MaxSizeVertexCache = 32; +const F64 FindVertexScore_Scaler = 1.0/(MaxSizeVertexCache-3); -F32 find_vertex_score(LLVCacheVertexData& data) +F64 find_vertex_score(LLVCacheVertexData& data) { - if (data.mActiveTriangles == 0) - { //no triangle references this vertex - return -1.f; - } - - F32 score = 0.f; + F64 score = -1.0; - S32 cache_idx = data.mCacheTag; + if (data.mActiveTriangles >= 0) + { + score = 0.0; + + S32 cache_idx = data.mCacheTag; - if (cache_idx < 0) - { - //not in cache - } - else - { - if (cache_idx < 3) - { //vertex was in the last triangle - score = FindVertexScore_LastTriScore; + if (cache_idx < 0) + { + //not in cache } else - { //more points for being higher in the cache - F32 scaler = 1.f/(MaxSizeVertexCache-3); - score = 1.f-((cache_idx-3)*scaler); - score = powf(score, FindVertexScore_CacheDecayPower); + { + if (cache_idx < 3) + { //vertex was in the last triangle + score = FindVertexScore_LastTriScore; + } + else + { //more points for being higher in the cache + score = 1.0-((cache_idx-3)*FindVertexScore_Scaler); + score = pow(score, FindVertexScore_CacheDecayPower); + } } - } - //bonus points for having low valence - F32 valence_boost = powf((F32)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); - score += FindVertexScore_ValenceBoostScale * valence_boost; + //bonus points for having low valence + F64 valence_boost = pow((F64)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); + score += FindVertexScore_ValenceBoostScale * valence_boost; + } return score; } @@ -5720,7 +5721,7 @@ public: if (mCache[i]) { mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert_always(mCache[i]->mCacheTag == i); + llassert(mCache[i]->mCacheTag == i); } } @@ -5728,11 +5729,14 @@ public: //update triangle scores for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) { - if (mCache[i]) + LLVCacheVertexData* data = mCache[i]; + if (data) { - for (U32 j = 0; j < mCache[i]->mTriangles.size(); ++j) + U32 count = data->mTriangles.size(); + + for (U32 j = 0; j < count; ++j) { - LLVCacheTriangleData* tri = mCache[i]->mTriangles[j]; + LLVCacheTriangleData* tri = data->mTriangles[j]; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5753,7 +5757,7 @@ public: { if (mCache[i]) { - llassert_always(mCache[i]->mCacheTag == -1); + llassert(mCache[i]->mCacheTag == -1); mCache[i] = NULL; } } @@ -5765,6 +5769,9 @@ void LLVolumeFace::cacheOptimize() { //optimize for vertex cache according to Forsyth method: // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html + llassert(!mOptimized); + mOptimized = TRUE; + LLVCacheLRU cache; if (mNumVertices < 3) @@ -5810,12 +5817,14 @@ void LLVolumeFace::cacheOptimize() for (U32 i = 0; i < mNumVertices; i++) { //initialize score values (no cache -- might try a fifo cache here) - vertex_data[i].mScore = find_vertex_score(vertex_data[i]); - vertex_data[i].mActiveTriangles = vertex_data[i].mTriangles.size(); + LLVCacheVertexData& data = vertex_data[i]; + + data.mScore = find_vertex_score(data); + data.mActiveTriangles = data.mTriangles.size(); - for (U32 j = 0; j < vertex_data[i].mTriangles.size(); ++j) + for (U32 j = 0; j < data.mActiveTriangles; ++j) { - vertex_data[i].mTriangles[j]->mScore += vertex_data[i].mScore; + data.mTriangles[j]->mScore += data.mScore; } } -- cgit v1.3 From 609ed855e1160505238378a1be49e2b92e8496f5 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Mon, 4 Mar 2013 18:01:42 -0600 Subject: MAINT-2371 More optimizations. Reviewed by Graham --- indra/llcommon/llmemory.h | 13 +- indra/llmath/llvolume.cpp | 220 ++++++++++++--------- indra/llmath/llvolume.h | 1 + indra/llrender/llgl.cpp | 11 +- indra/llrender/llimagegl.cpp | 34 +++- indra/llrender/llrender.cpp | 10 + indra/llrender/llshadermgr.cpp | 46 ++++- indra/llrender/llshadermgr.h | 44 +++++ indra/llrender/llvertexbuffer.cpp | 88 ++++----- indra/llrender/llvertexbuffer.h | 4 +- .../app_settings/shaders/class1/deferred/giF.glsl | 190 ------------------ .../shaders/class1/deferred/waterF.glsl | 6 +- .../shaders/class1/environment/underWaterF.glsl | 2 - .../shaders/class1/environment/waterF.glsl | 2 - indra/newview/llappviewer.cpp | 2 +- indra/newview/lldrawable.cpp | 2 +- indra/newview/lldrawpool.cpp | 1 + indra/newview/lldrawpoolavatar.cpp | 4 +- indra/newview/lldrawpoolterrain.cpp | 8 +- indra/newview/lldrawpoolwater.cpp | 62 +++--- indra/newview/llface.cpp | 99 +++++++--- indra/newview/llfasttimerview.cpp | 2 +- indra/newview/llviewerdisplay.cpp | 4 +- indra/newview/llviewershadermgr.cpp | 104 +++------- indra/newview/llviewershadermgr.h | 69 +------ indra/newview/llvovolume.cpp | 2 +- indra/newview/llwaterparammanager.cpp | 12 +- indra/newview/llwlparammanager.cpp | 4 +- indra/newview/pipeline.cpp | 10 +- 29 files changed, 474 insertions(+), 582 deletions(-) delete mode 100644 indra/newview/app_settings/shaders/class1/deferred/giF.glsl (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index e725bdd9fa..46cabfadcd 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -38,17 +38,28 @@ class LLMutex ; inline void* ll_aligned_malloc( size_t size, int align ) { +#if defined(LL_WINDOWS) + return _aligned_malloc(size, align); +#else void* mem = malloc( size + (align - 1) + sizeof(void*) ); char* aligned = ((char*)mem) + sizeof(void*); aligned += align - ((uintptr_t)aligned & (align - 1)); ((void**)aligned)[-1] = mem; return aligned; +#endif } inline void ll_aligned_free( void* ptr ) { - free( ((void**)ptr)[-1] ); +#if defined(LL_WINDOWS) + _aligned_free(ptr); +#else + if (ptr) + { + free( ((void**)ptr)[-1] ); + } +#endif } #if !LL_USE_TCMALLOC diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 77d89568df..d614695efb 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5180,6 +5180,7 @@ LLVolumeFace::LLVolumeFace() : mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5204,6 +5205,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5258,12 +5260,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) { LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size); } - else - { - ll_aligned_free_16(mTexCoords) ; - mTexCoords = NULL ; - } - if (src.mBinormals) { @@ -5311,10 +5307,11 @@ void LLVolumeFace::freeData() { ll_aligned_free_16(mPositions); mPositions = NULL; - ll_aligned_free_16( mNormals); + + //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately mNormals = NULL; - ll_aligned_free_16(mTexCoords); mTexCoords = NULL; + ll_aligned_free_16(mIndices); mIndices = NULL; ll_aligned_free_16(mBinormals); @@ -5496,18 +5493,6 @@ void LLVolumeFace::optimize(F32 angle_cutoff) llassert(new_face.mNumIndices == mNumIndices); llassert(new_face.mNumVertices <= mNumVertices); - if (angle_cutoff > 1.f && !mNormals) - { - ll_aligned_free_16(new_face.mNormals); - new_face.mNormals = NULL; - } - - if (!mTexCoords) - { - ll_aligned_free_16(new_face.mTexCoords); - new_face.mTexCoords = NULL; - } - swapData(new_face); } @@ -5708,35 +5693,44 @@ public: void updateScores() { - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) - { //trailing 3 vertices aren't actually in the cache for scoring purposes - if (mCache[i]) + LLVCacheVertexData** data_iter = mCache+MaxSizeVertexCache; + LLVCacheVertexData** end_data = mCache+MaxSizeVertexCache+3; + + while(data_iter != end_data) + { + LLVCacheVertexData* data = *data_iter++; + //trailing 3 vertices aren't actually in the cache for scoring purposes + if (data) { - mCache[i]->mCacheTag = -1; + data->mCacheTag = -1; } } - for (U32 i = 0; i < MaxSizeVertexCache; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache; + + while (data_iter != end_data) { //update scores of vertices in cache - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert(mCache[i]->mCacheTag == i); + data->mScore = find_vertex_score(*data); } } mBestTriangle = NULL; //update triangle scores - for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache+3; + + while (data_iter != end_data) { - LLVCacheVertexData* data = mCache[i]; + LLVCacheVertexData* data = *data_iter++; if (data) { - U32 count = data->mTriangles.size(); - - for (U32 j = 0; j < count; ++j) + for (std::vector::iterator iter = data->mTriangles.begin(), end_iter = data->mTriangles.end(); iter != end_iter; ++iter) { - LLVCacheTriangleData* tri = data->mTriangles[j]; + LLVCacheTriangleData* tri = *iter; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5753,13 +5747,17 @@ public: } //knock trailing 3 vertices off the cache - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache+MaxSizeVertexCache; + end_data = mCache+MaxSizeVertexCache+3; + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter; + if (data) { - llassert(mCache[i]->mCacheTag == -1); - mCache[i] = NULL; + llassert(data->mCacheTag == -1); + *data_iter = NULL; } + ++data_iter; } } }; @@ -5894,10 +5892,10 @@ void LLVolumeFace::cacheOptimize() //allocate space for new buffer S32 num_verts = mNumVertices; - LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size); + LLVector4a* pos = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + LLVector4a* norm = pos + num_verts; + LLVector2* tc = (LLVector2*) (norm + num_verts); LLVector4a* wght = NULL; if (mWeights) @@ -5945,9 +5943,8 @@ void LLVolumeFace::cacheOptimize() mIndices[i] = new_idx[mIndices[i]]; } - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); - ll_aligned_free_16(mTexCoords); + ll_aligned_free(mPositions); + // DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mWeights); ll_aligned_free_16(mBinormals); @@ -6664,24 +6661,22 @@ void LLVolumeFace::createBinormals() void LLVolumeFace::resizeVertices(S32 num_verts) { - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); + ll_aligned_free(mPositions); + //DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mBinormals); - ll_aligned_free_16(mTexCoords); - + mBinormals = NULL; if (num_verts) { - mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mNormals, 16); - //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - ll_assert_aligned(mTexCoords, 16); + + mPositions = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + mNormals = mPositions+num_verts; + mTexCoords = (LLVector2*) (mNormals+num_verts); + + ll_assert_aligned(mPositions, 64); } else { @@ -6691,6 +6686,7 @@ void LLVolumeFace::resizeVertices(S32 num_verts) } mNumVertices = num_verts; + mNumAllocatedVertices = num_verts; } void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) @@ -6701,27 +6697,43 @@ void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) { S32 new_verts = mNumVertices+1; - S32 new_size = new_verts*16; - S32 old_size = mNumVertices*16; - //positions - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); - ll_assert_aligned(mPositions,16); + if (new_verts > mNumAllocatedVertices) + { + //double buffer size on expansion + new_verts *= 2; + + S32 new_tc_size = ((new_verts*8)+0xF) & ~0xF; + S32 old_tc_size = ((mNumVertices*8)+0xF) & ~0xF; + + S32 old_vsize = mNumVertices*16; + + S32 new_size = new_verts*16*2+new_tc_size; + + LLVector4a* old_buf = mPositions; + + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions+new_verts; + mTexCoords = (LLVector2*) (mNormals+new_verts); + + //positions + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + + //normals + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); - //normals - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); - ll_assert_aligned(mNormals,16); - - //tex coords - new_size = ((new_verts*8)+0xF) & ~0xF; - old_size = ((mNumVertices*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); - ll_assert_aligned(mTexCoords,16); + //tex coords + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tc_size); + //just clear binormals + ll_aligned_free_16(mBinormals); - //just clear binormals - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free(old_buf); + + mNumAllocatedVertices = new_verts; + + mBinormals = NULL; + } mPositions[mNumVertices] = pos; mNormals[mNumVertices] = norm; @@ -6810,13 +6822,23 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat llerrs << "Cannot append empty face." << llendl; } + U32 old_vsize = mNumVertices*16; + U32 new_vsize = new_count * 16; + U32 old_tcsize = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_tcsize = (new_count*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_size = new_vsize * 2 + new_tcsize; + //allocate new buffer space - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); - ll_assert_aligned(mTexCoords, 16); + LLVector4a* old_buf = mPositions; + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions + new_count; + mTexCoords = (LLVector2*) (mNormals+new_count); + + mNumAllocatedVertices = new_count; + + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tcsize); mNumVertices = new_count; @@ -6912,12 +6934,15 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - S32 begin_stex = llfloor( profile[mBeginS].mV[2] ); + F32 begin_stex = floorf(profile[mBeginS].mV[2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; + S32 end_t = mBeginT+mNumT; + bool test = (mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2; + // Copy the vertices into the array - for (t = mBeginT; t < mBeginT + mNumT; t++) + for (t = mBeginT; t < end_t; t++) { tt = path_data[t].mTexT; for (s = 0; s < num_s; s++) @@ -6968,9 +6993,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) norm[cur_vertex].clear(); cur_vertex++; - if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0) + if (test && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); tc[cur_vertex] = LLVector2(ss,tt); @@ -7085,30 +7109,38 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } //generate normals - for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle + U32 count = mNumIndices/3; + + for (U32 i = 0; i < count; i++) //for each triangle { const U16* idx = &(mIndices[i*3]); - - LLVector4a* v[] = - { pos+idx[0], pos+idx[1], pos+idx[2] }; + LLVector4a& v0 = *(pos+idx[0]); + LLVector4a& v1 = *(pos+idx[1]); + LLVector4a& v2 = *(pos+idx[2]); - LLVector4a* n[] = - { norm+idx[0], norm+idx[1], norm+idx[2] }; + LLVector4a& n0 = *(norm+idx[0]); + LLVector4a& n1 = *(norm+idx[1]); + LLVector4a& n2 = *(norm+idx[2]); //calculate triangle normal LLVector4a a, b, c; - a.setSub(*v[0], *v[1]); - b.setSub(*v[0], *v[2]); + a.setSub(v0, v1); + b.setSub(v0, v2); c.setCross3(a,b); - n[0]->add(c); - n[1]->add(c); - n[2]->add(c); + n0.add(c); + n1.add(c); + n2.add(c); //even out quad contributions - n[i%2+1]->add(c); + switch (i%2+1) + { + case 0: n0.add(c); break; + case 1: n1.add(c); break; + case 2: n2.add(c); break; + }; } // adjust normals based on wrapping and stitching diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 99158c1c44..1d3b0fe52f 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -912,6 +912,7 @@ public: LLVector2 mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face. S32 mNumVertices; + S32 mNumAllocatedVertices; S32 mNumIndices; LLVector4a* mPositions; diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index 89f1f36297..58bd346c15 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -741,7 +741,7 @@ bool LLGLManager::initGL() #if LL_WINDOWS if (mHasDebugOutput && gDebugGL) { //setup debug output callback - //glDebugMessageControlARB(GL_DONT_CARE, GL_DONT_CARE, GL_DEBUG_SEVERITY_LOW_ARB, 0, NULL, GL_TRUE); + glDebugMessageControlARB(GL_DONT_CARE, GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB, GL_DEBUG_SEVERITY_LOW_ARB, 0, NULL, GL_TRUE); glDebugMessageCallbackARB((GLDEBUGPROCARB) gl_debug_callback, NULL); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB); } @@ -1478,7 +1478,7 @@ void do_assert_glerror() void assert_glerror() { - if (!gGLActive) +/* if (!gGLActive) { //llwarns << "GL used while not active!" << llendl; @@ -1487,8 +1487,13 @@ void assert_glerror() //ll_fail("GL used while not active"); } } +*/ - if (gDebugGL) + if (!gDebugGL) + { + //funny looking if for branch prediction -- gDebugGL is almost always false and assert_glerror is called often + } + else { do_assert_glerror(); } diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp index a4d7872ec2..ef2648ae98 100755 --- a/indra/llrender/llimagegl.cpp +++ b/indra/llrender/llimagegl.cpp @@ -709,9 +709,12 @@ void LLImageGL::setImage(const U8* data_in, BOOL data_hasmips) mMipLevels = wpo2(llmax(w, h)); - //use legacy mipmap generation mode - glTexParameteri(mTarget, GL_GENERATE_MIPMAP, GL_TRUE); - + if (!gGLManager.mHasFramebufferObject) + { + //use legacy mipmap generation mode + glTexParameteri(mTarget, GL_GENERATE_MIPMAP, GL_TRUE); + } + LLImageGL::setManualImage(mTarget, 0, mFormatInternal, w, h, mFormatPrimary, mFormatType, @@ -726,6 +729,11 @@ void LLImageGL::setImage(const U8* data_in, BOOL data_hasmips) glPixelStorei(GL_UNPACK_SWAP_BYTES, 0); stop_glerror(); } + + if (gGLManager.mHasFramebufferObject) + { + glGenerateMipmap(mTarget); + } } } else @@ -1057,6 +1065,16 @@ void LLImageGL::generateTextures(LLTexUnit::eTextureType type, U32 format, S32 n { bool empty = true; + if (LLRender::sGLCoreProfile) + { + switch (format) + { + case GL_LUMINANCE8: format = GL_RGB8; break; + case GL_LUMINANCE8_ALPHA8: + case GL_ALPHA8: format = GL_RGBA8; break; + } + } + dead_texturelist_t::iterator iter = sDeadTextureList[type].find(format); if (iter != sDeadTextureList[type].end()) @@ -1084,6 +1102,16 @@ void LLImageGL::deleteTextures(LLTexUnit::eTextureType type, U32 format, S32 mip { if (gGLManager.mInited) { + if (LLRender::sGLCoreProfile) + { + switch (format) + { + case GL_LUMINANCE8: format = GL_RGB8; break; + case GL_LUMINANCE8_ALPHA8: + case GL_ALPHA8: format = GL_RGBA8; break; + } + } + if (format == 0 || type == LLTexUnit::TT_CUBE_MAP || mip_levels == -1) { //unknown internal format or unknown number of mip levels, not safe to reuse glDeleteTextures(numTextures, textures); diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp index cb82cbfb74..dada27557e 100644 --- a/indra/llrender/llrender.cpp +++ b/indra/llrender/llrender.cpp @@ -1070,6 +1070,16 @@ LLRender::~LLRender() void LLRender::init() { + if (sGLCoreProfile && !LLVertexBuffer::sUseVAO) + { //bind a dummy vertex array object so we're core profile compliant +#ifdef GL_ARB_vertex_array_object + U32 ret; + glGenVertexArrays(1, &ret); + glBindVertexArray(ret); +#endif + } + + llassert_always(mBuffer.isNull()) ; stop_glerror(); mBuffer = new LLVertexBuffer(immediate_mask, 0); diff --git a/indra/llrender/llshadermgr.cpp b/indra/llrender/llshadermgr.cpp index b6a9a6b653..825f80a6dc 100644 --- a/indra/llrender/llshadermgr.cpp +++ b/indra/llrender/llshadermgr.cpp @@ -974,7 +974,9 @@ void LLShaderMgr::initAttribsAndUniforms() mReservedUniforms.push_back("texture_matrix1"); mReservedUniforms.push_back("texture_matrix2"); mReservedUniforms.push_back("texture_matrix3"); - llassert(mReservedUniforms.size() == LLShaderMgr::TEXTURE_MATRIX3+1); + mReservedUniforms.push_back("object_plane_s"); + mReservedUniforms.push_back("object_plane_t"); + llassert(mReservedUniforms.size() == LLShaderMgr::OBJECT_PLANE_T+1); mReservedUniforms.push_back("viewport"); @@ -1116,6 +1118,48 @@ void LLShaderMgr::initAttribsAndUniforms() mReservedUniforms.push_back("bloomMap"); mReservedUniforms.push_back("projectionMap"); + mReservedUniforms.push_back("matrixPalette"); + + + mReservedUniforms.reserve(12); + mReservedUniforms.push_back("screenTex"); + mReservedUniforms.push_back("screenDepth"); + mReservedUniforms.push_back("refTex"); + mReservedUniforms.push_back("eyeVec"); + mReservedUniforms.push_back("time"); + mReservedUniforms.push_back("d1"); + mReservedUniforms.push_back("d2"); + mReservedUniforms.push_back("lightDir"); + mReservedUniforms.push_back("specular"); + mReservedUniforms.push_back("lightExp"); + mReservedUniforms.push_back("waterFogColor"); + mReservedUniforms.push_back("waterFogDensity"); + mReservedUniforms.push_back("waterFogKS"); + mReservedUniforms.push_back("refScale"); + mReservedUniforms.push_back("waterHeight"); + mReservedUniforms.push_back("waterPlane"); + mReservedUniforms.push_back("normScale"); + mReservedUniforms.push_back("fresnelScale"); + mReservedUniforms.push_back("fresnelOffset"); + mReservedUniforms.push_back("blurMultiplier"); + mReservedUniforms.push_back("sunAngle"); + mReservedUniforms.push_back("scaledAngle"); + mReservedUniforms.push_back("sunAngle2"); + + mReservedUniforms.push_back("camPosLocal"); + + mReservedUniforms.push_back("gWindDir"); + mReservedUniforms.push_back("gSinWaveParams"); + mReservedUniforms.push_back("gGravity"); + + mReservedUniforms.push_back("detail_0"); + mReservedUniforms.push_back("detail_1"); + mReservedUniforms.push_back("detail_2"); + mReservedUniforms.push_back("detail_3"); + mReservedUniforms.push_back("alpha_ramp"); + + mReservedUniforms.push_back("origin"); + llassert(mReservedUniforms.size() == END_RESERVED_UNIFORMS); std::set dupe_check; diff --git a/indra/llrender/llshadermgr.h b/indra/llrender/llshadermgr.h index 7a16b7c20f..77e90372e0 100644 --- a/indra/llrender/llshadermgr.h +++ b/indra/llrender/llshadermgr.h @@ -47,6 +47,8 @@ public: TEXTURE_MATRIX1, TEXTURE_MATRIX2, TEXTURE_MATRIX3, + OBJECT_PLANE_S, + OBJECT_PLANE_T, VIEWPORT, LIGHT_POSITION, LIGHT_DIRECTION, @@ -164,7 +166,49 @@ public: DEFERRED_LIGHT, DEFERRED_BLOOM, DEFERRED_PROJECTION, + + AVATAR_MATRIX, + + WATER_SCREENTEX, + WATER_SCREENDEPTH, + WATER_REFTEX, + WATER_EYEVEC, + WATER_TIME, + WATER_WAVE_DIR1, + WATER_WAVE_DIR2, + WATER_LIGHT_DIR, + WATER_SPECULAR, + WATER_SPECULAR_EXP, + WATER_FOGCOLOR, + WATER_FOGDENSITY, + WATER_FOGKS, + WATER_REFSCALE, + WATER_WATERHEIGHT, + WATER_WATERPLANE, + WATER_NORM_SCALE, + WATER_FRESNEL_SCALE, + WATER_FRESNEL_OFFSET, + WATER_BLUR_MULTIPLIER, + WATER_SUN_ANGLE, + WATER_SCALED_ANGLE, + WATER_SUN_ANGLE2, + + WL_CAMPOSLOCAL, + + AVATAR_WIND, + AVATAR_SINWAVE, + AVATAR_GRAVITY, + + TERRAIN_DETAIL0, + TERRAIN_DETAIL1, + TERRAIN_DETAIL2, + TERRAIN_DETAIL3, + TERRAIN_ALPHARAMP, + + SHINY_ORIGIN, + END_RESERVED_UNIFORMS + } eGLSLReservedUniforms; // singleton pattern implementation diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index f152911b24..1d257d8415 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -202,7 +202,7 @@ volatile U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed) glBufferDataARB(mType, size, 0, mUsage); if (mUsage != GL_DYNAMIC_COPY_ARB) { //data will be provided by application - ret = (U8*) ll_aligned_malloc_16(size); + ret = (U8*) ll_aligned_malloc(size, 64); } } else @@ -256,7 +256,7 @@ void LLVBOPool::release(U32 name, volatile U8* buffer, U32 size) llassert(vbo_block_size(size) == size); deleteBuffer(name); - ll_aligned_free_16((U8*) buffer); + ll_aligned_free((U8*) buffer); if (mType == GL_ARRAY_BUFFER_ARB) { @@ -1298,7 +1298,7 @@ void LLVertexBuffer::allocateBuffer(S32 nverts, S32 nindices, bool create) //actually allocate space for the vertex buffer if using VBO mapping flush(); - if (gGLManager.mHasVertexArrayObject && useVBOs() && (LLRender::sGLCoreProfile || sUseVAO)) + if (gGLManager.mHasVertexArrayObject && useVBOs() && (sUseVAO)) { #if GL_ARB_vertex_array_object mGLArray = getVAOName(); @@ -1454,21 +1454,18 @@ bool LLVertexBuffer::useVBOs() const //---------------------------------------------------------------------------- -bool expand_region(LLVertexBuffer::MappedRegion& region, S32 index, S32 count) +bool expand_region(LLVertexBuffer::MappedRegion& region, S32 start, S32 end) { - S32 end = index+count; - S32 region_end = region.mIndex+region.mCount; - if (end < region.mIndex || - index > region_end) + start > region.mEnd) { //gap exists, do not merge return false; } - S32 new_end = llmax(end, region_end); - S32 new_index = llmin(index, region.mIndex); - region.mIndex = new_index; - region.mCount = new_end-new_index; + region.mEnd = llmax(end, region.mEnd); + region.mIndex = llmin(start, region.mIndex); + region.mCount = region.mEnd-region.mIndex; + return true; } @@ -1478,7 +1475,6 @@ static LLFastTimer::DeclareTimer FTM_VBO_MAP_BUFFER("VBO Map"); // Map for data access volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range) { - bindGLBuffer(true); if (mFinal) { llerrs << "LLVertexBuffer::mapVeretxBuffer() called on a finalized buffer." << llendl; @@ -1499,23 +1495,23 @@ volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, boo bool mapped = false; //see if range is already mapped - for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) + S32 start_index = mOffsets[type]+index*sTypeSize[type]; + S32 end_index = start_index+count*sTypeSize[type]; + + for (std::vector::iterator iter = mMappedVertexRegions.begin(), end = mMappedVertexRegions.end(); iter != end; ++iter) { - MappedRegion& region = mMappedVertexRegions[i]; - if (region.mType == type) + MappedRegion& region = *iter; + if (expand_region(region, index, end_index)) { - if (expand_region(region, index, count)) - { - mapped = true; - break; - } + mapped = true; + break; } } if (!mapped) { //not already mapped, map new region - MappedRegion region(type, mMappable && map_range ? -1 : index, count); + MappedRegion region(mMappable && map_range ? -1 : start_index, end_index-start_index); mMappedVertexRegions.push_back(region); } } @@ -1539,6 +1535,7 @@ volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, boo { volatile U8* src = NULL; waitFence(); + bindGLBuffer(); if (gGLManager.mHasMapBufferRange) { if (map_range) @@ -1657,7 +1654,6 @@ static LLFastTimer::DeclareTimer FTM_VBO_MAP_INDEX("IBO Map"); volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range) { - bindGLIndices(true); if (mFinal) { llerrs << "LLVertexBuffer::mapIndexBuffer() called on a finalized buffer." << llendl; @@ -1676,12 +1672,14 @@ volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range count = mNumIndices-index; } + S32 end = index+count; + bool mapped = false; //see if range is already mapped for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) { MappedRegion& region = mMappedIndexRegions[i]; - if (expand_region(region, index, count)) + if (expand_region(region, index, end)) { mapped = true; break; @@ -1691,7 +1689,7 @@ volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range if (!mapped) { //not already mapped, map new region - MappedRegion region(TYPE_INDEX, mMappable && map_range ? -1 : index, count); + MappedRegion region(mMappable && map_range ? -1 : index, count); mMappedIndexRegions.push_back(region); } } @@ -1707,23 +1705,23 @@ volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range sMappedCount++; stop_glerror(); - if (gDebugGL && useVBOs()) - { - GLint elem = 0; - glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING_ARB, &elem); - - if (elem != mGLIndices) - { - llerrs << "Wrong index buffer bound!" << llendl; - } - } - if(!mMappable) { map_range = false; } else { + bindGLIndices(); + if (gDebugGL && useVBOs()) + { + GLint elem = 0; + glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING_ARB, &elem); + + if (elem != mGLIndices) + { + llerrs << "Wrong index buffer bound!" << llendl; + } + } volatile U8* src = NULL; waitFence(); if (gGLManager.mHasMapBufferRange) @@ -1837,7 +1835,7 @@ void LLVertexBuffer::unmapBuffer() llassert(mUsage != GL_DYNAMIC_COPY_ARB); LLFastTimer t(FTM_VBO_UNMAP); - bindGLBuffer(true); + bindGLBuffer(); updated_all = mIndexLocked; //both vertex and index buffers done updating if(!mMappable) @@ -1848,8 +1846,8 @@ void LLVertexBuffer::unmapBuffer() for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) { const MappedRegion& region = mMappedVertexRegions[i]; - S32 offset = region.mIndex >= 0 ? mOffsets[region.mType]+sTypeSize[region.mType]*region.mIndex : 0; - S32 length = sTypeSize[region.mType]*region.mCount; + S32 offset = region.mIndex; + S32 length = region.mCount; glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, offset, length, (U8*) mMappedData+offset); stop_glerror(); } @@ -1873,8 +1871,8 @@ void LLVertexBuffer::unmapBuffer() for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) { const MappedRegion& region = mMappedVertexRegions[i]; - S32 offset = region.mIndex >= 0 ? mOffsets[region.mType]+sTypeSize[region.mType]*region.mIndex : 0; - S32 length = sTypeSize[region.mType]*region.mCount; + S32 offset = region.mIndex; + S32 length = region.mCount; if (gGLManager.mHasMapBufferRange) { LLFastTimer t(FTM_VBO_FLUSH_RANGE); @@ -2083,7 +2081,6 @@ bool LLVertexBuffer::bindGLArray() if (mGLArray && sGLRenderArray != mGLArray) { { - LLFastTimer t(FTM_BIND_GL_ARRAY); #if GL_ARB_vertex_array_object glBindVertexArray(mGLArray); #endif @@ -2453,11 +2450,10 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) llglassertok(); } -LLVertexBuffer::MappedRegion::MappedRegion(S32 type, S32 index, S32 count) -: mType(type), mIndex(index), mCount(count) +LLVertexBuffer::MappedRegion::MappedRegion(S32 index, S32 count) +: mIndex(index), mCount(count) { - llassert(mType == LLVertexBuffer::TYPE_INDEX || - mType < LLVertexBuffer::TYPE_TEXTURE_INDEX); + mEnd = mIndex+mCount; } diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index a3400ae80c..52559d3505 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -104,11 +104,11 @@ public: class MappedRegion { public: - S32 mType; S32 mIndex; S32 mCount; + S32 mEnd; - MappedRegion(S32 type, S32 index, S32 count); + MappedRegion(S32 index, S32 count); }; LLVertexBuffer(const LLVertexBuffer& rhs) diff --git a/indra/newview/app_settings/shaders/class1/deferred/giF.glsl b/indra/newview/app_settings/shaders/class1/deferred/giF.glsl deleted file mode 100644 index da1b234240..0000000000 --- a/indra/newview/app_settings/shaders/class1/deferred/giF.glsl +++ /dev/null @@ -1,190 +0,0 @@ -/** - * @file giF.glsl - * - * $LicenseInfo:firstyear=2007&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2007, Linden Research, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA - * $/LicenseInfo$ - */ - -#extension GL_ARB_texture_rectangle : enable - -#ifdef DEFINE_GL_FRAGCOLOR -out vec4 frag_color; -#else -#define frag_color gl_FragColor -#endif - -uniform sampler2DRect depthMap; -uniform sampler2DRect normalMap; -uniform sampler2D noiseMap; - -uniform sampler2D diffuseGIMap; -uniform sampler2D normalGIMap; -uniform sampler2D depthGIMap; - -uniform sampler2D lightFunc; - -// Inputs -VARYING vec2 vary_fragcoord; - -uniform vec2 screen_res; - -uniform mat4 inv_proj; -uniform mat4 gi_mat; //gPipeline.mGIMatrix - eye space to sun space -uniform mat4 gi_mat_proj; //gPipeline.mGIMatrixProj - eye space to projected sun space -uniform mat4 gi_norm_mat; //gPipeline.mGINormalMatrix - eye space normal to sun space normal matrix -uniform mat4 gi_inv_proj; //gPipeline.mGIInvProj - projected sun space to sun space -uniform float gi_radius; -uniform float gi_intensity; -uniform int gi_samples; -uniform vec2 gi_kern[25]; -uniform vec2 gi_scale; -uniform vec3 gi_quad; -uniform vec3 gi_spec; -uniform float gi_direction_weight; -uniform float gi_light_offset; - -vec4 getPosition(vec2 pos_screen) -{ - float depth = texture2DRect(depthMap, pos_screen.xy).a; - vec2 sc = pos_screen.xy*2.0; - sc /= screen_res; - sc -= vec2(1.0,1.0); - vec4 ndc = vec4(sc.x, sc.y, 2.0*depth-1.0, 1.0); - vec4 pos = inv_proj * ndc; - pos /= pos.w; - pos.w = 1.0; - return pos; -} - -vec4 getGIPosition(vec2 gi_tc) -{ - float depth = texture2D(depthGIMap, gi_tc).a; - vec2 sc = gi_tc*2.0; - sc -= vec2(1.0, 1.0); - vec4 ndc = vec4(sc.x, sc.y, 2.0*depth-1.0, 1.0); - vec4 pos = gi_inv_proj*ndc; - pos.xyz /= pos.w; - pos.w = 1.0; - return pos; -} - -vec3 giAmbient(vec3 pos, vec3 norm) -{ - vec4 gi_c = gi_mat_proj * vec4(pos, 1.0); - gi_c.xyz /= gi_c.w; - - vec4 gi_pos = gi_mat*vec4(pos,1.0); - vec3 gi_norm = (gi_norm_mat*vec4(norm,1.0)).xyz; - gi_norm = normalize(gi_norm); - - vec2 tcx = gi_norm.xy; - vec2 tcy = gi_norm.yx; - - vec4 eye_pos = gi_mat*vec4(0,0,0,1.0); - - vec3 eye_dir = normalize(gi_pos.xyz-eye_pos.xyz/eye_pos.w); - - //vec3 eye_dir = vec3(0,0,-1); - //eye_dir = (gi_norm_mat*vec4(eye_dir, 1.0)).xyz; - //eye_dir = normalize(eye_dir); - - //float round_x = gi_scale.x; - //float round_y = gi_scale.y; - - vec3 debug = texture2D(normalGIMap, gi_c.xy).rgb*0.5+0.5; - debug.xz = vec2(0.0,0.0); - //debug = fract(debug); - - float round_x = 1.0/64.0; - float round_y = 1.0/64.0; - - //gi_c.x = floor(gi_c.x/round_x+0.5)*round_x; - //gi_c.y = floor(gi_c.y/round_y+0.5)*round_y; - - float fda = 0.0; - vec3 fdiff = vec3(0,0,0); - - vec3 rcol = vec3(0,0,0); - - float fsa = 0.0; - - for (int i = -1; i < 2; i+=2 ) - { - for (int j = -1; j < 2; j+=2) - { - vec2 tc = vec2(i, j)*0.75; - vec3 nz = texture2D(noiseMap, vary_fragcoord.xy/128.0+tc*0.5).xyz; - //tc += gi_norm.xy*nz.z; - tc += nz.xy*2.0; - tc /= gi_samples; - tc += gi_c.xy; - - vec3 lnorm = -normalize(texture2D(normalGIMap, tc.xy).xyz*2.0-1.0); - vec3 lpos = getGIPosition(tc.xy).xyz; - - vec3 at = lpos-gi_pos.xyz; - float dist = dot(at,at); - float da = clamp(1.0/(gi_spec.x*dist), 0.0, 1.0); - - if (da > 0.0) - { - //add angular attenuation - vec3 ldir = at; - float ang_atten = clamp(dot(ldir, gi_norm), 0.0, 1.0); - - float ld = -dot(ldir, lnorm); - - if (ang_atten > 0.0 && ld < 0.0) - { - vec3 diff = texture2D(diffuseGIMap, tc.xy).xyz; - da = da*ang_atten; - fda += da; - fdiff += diff*da; - } - } - } - } - - fdiff /= max(gi_spec.y*fda, gi_quad.z); - fdiff = clamp(fdiff, vec3(0), vec3(1)); - - vec3 ret = fda*fdiff; - //ret = ret*ret*gi_quad.x+ret*gi_quad.y+gi_quad.z; - - //fda *= nz.z; - - //rcol.rgb *= gi_intensity; - //return rcol.rgb+vary_AmblitColor.rgb*0.25; - //return vec4(debug, 0.0); - //return vec4(fda*fdiff, 0.0); - return clamp(ret,vec3(0.0), vec3(1.0)); - //return debug.xyz; -} - -void main() -{ - vec2 pos_screen = vary_fragcoord.xy; - vec4 pos = getPosition(pos_screen); - vec3 norm = texture2DRect(normalMap, pos_screen).xyz; - norm = vec3((norm.xy-0.5)*2.0,norm.z); // unpack norm - - frag_color.xyz = giAmbient(pos, norm); -} diff --git a/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl b/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl index 3427d6db57..1149aec30b 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl @@ -53,13 +53,11 @@ uniform vec3 specular; uniform float lightExp; uniform float refScale; uniform float kd; -uniform vec2 screenRes; uniform vec3 normScale; uniform float fresnelScale; uniform float fresnelOffset; uniform float blurMultiplier; -uniform vec2 screen_res; -uniform mat4 norm_mat; //region space to screen space +uniform mat3 normal_matrix; //bigWave is (refCoord.w, view.w); VARYING vec4 refCoord; @@ -157,7 +155,7 @@ void main() //wavef.z *= 0.1f; //wavef = normalize(wavef); - vec3 screenspacewavef = (norm_mat*vec4(wavef, 1.0)).xyz; + vec3 screenspacewavef = normal_matrix*wavef; frag_data[0] = vec4(color.rgb, 0.5); // diffuse frag_data[1] = vec4(0.5,0.5,0.5, 0.95); // speccolor*spec, spec diff --git a/indra/newview/app_settings/shaders/class1/environment/underWaterF.glsl b/indra/newview/app_settings/shaders/class1/environment/underWaterF.glsl index 0d8dab0a41..485e48537c 100644 --- a/indra/newview/app_settings/shaders/class1/environment/underWaterF.glsl +++ b/indra/newview/app_settings/shaders/class1/environment/underWaterF.glsl @@ -43,13 +43,11 @@ uniform vec2 fbScale; uniform float refScale; uniform float znear; uniform float zfar; -uniform float kd; uniform vec4 waterPlane; uniform vec3 eyeVec; uniform vec4 waterFogColor; uniform float waterFogDensity; uniform float waterFogKS; -uniform vec2 screenRes; //bigWave is (refCoord.w, view.w); VARYING vec4 refCoord; diff --git a/indra/newview/app_settings/shaders/class1/environment/waterF.glsl b/indra/newview/app_settings/shaders/class1/environment/waterF.glsl index 79bffab745..1fd7bdaa5c 100644 --- a/indra/newview/app_settings/shaders/class1/environment/waterF.glsl +++ b/indra/newview/app_settings/shaders/class1/environment/waterF.glsl @@ -42,8 +42,6 @@ uniform vec3 lightDir; uniform vec3 specular; uniform float lightExp; uniform float refScale; -uniform float kd; -uniform vec2 screenRes; uniform vec3 normScale; uniform float fresnelScale; uniform float fresnelOffset; diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 7331b93810..9bbaede68d 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -537,7 +537,7 @@ static void settings_to_globals() LLSurface::setTextureSize(gSavedSettings.getU32("RegionTextureSize")); LLRender::sGLCoreProfile = gSavedSettings.getBOOL("RenderGLCoreProfile"); - + LLVertexBuffer::sUseVAO = gSavedSettings.getBOOL("RenderUseVAO"); LLImageGL::sGlobalUseAnisotropic = gSavedSettings.getBOOL("RenderAnisotropic"); LLImageGL::sCompressTextures = gSavedSettings.getBOOL("RenderCompressTextures"); LLVOVolume::sLODFactor = gSavedSettings.getF32("RenderVolumeLODFactor"); diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp index d041baea90..d046b22133 100644 --- a/indra/newview/lldrawable.cpp +++ b/indra/newview/lldrawable.cpp @@ -254,7 +254,7 @@ S32 LLDrawable::findReferences(LLDrawable *drawablep) return count; } -static LLFastTimer::DeclareTimer FTM_ALLOCATE_FACE("Allocate Face", true); +static LLFastTimer::DeclareTimer FTM_ALLOCATE_FACE("Allocate Face"); LLFace* LLDrawable::addFace(LLFacePool *poolp, LLViewerTexture *texturep) { diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp index 94dd927d26..d8f293cc62 100644 --- a/indra/newview/lldrawpool.cpp +++ b/indra/newview/lldrawpool.cpp @@ -472,6 +472,7 @@ void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL ba { params.mGroup->rebuildMesh(); } + params.mVertexBuffer->setBuffer(mask); params.mVertexBuffer->drawRange(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset); gPipeline.addTrianglesDrawn(params.mCount, params.mDrawMode); diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp index 6d02ad2b96..c3cf744222 100644 --- a/indra/newview/lldrawpoolavatar.cpp +++ b/indra/newview/lldrawpoolavatar.cpp @@ -1505,7 +1505,7 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow) stop_glerror(); - LLDrawPoolAvatar::sVertexProgram->uniformMatrix4fv("matrixPalette", + LLDrawPoolAvatar::sVertexProgram->uniformMatrix4fv(LLViewerShaderMgr::AVATAR_MATRIX, skin->mJointNames.size(), FALSE, (GLfloat*) mat[0].mMatrix); @@ -1547,6 +1547,8 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow) buff->setBuffer(data_mask); buff->drawRange(LLRender::TRIANGLES, start, end, count, offset); } + + gPipeline.addTrianglesDrawn(count, LLRender::TRIANGLES); } } } diff --git a/indra/newview/lldrawpoolterrain.cpp b/indra/newview/lldrawpoolterrain.cpp index 9bc32fddbd..cac862a107 100644 --- a/indra/newview/lldrawpoolterrain.cpp +++ b/indra/newview/lldrawpoolterrain.cpp @@ -352,8 +352,8 @@ void LLDrawPoolTerrain::renderFullShader() LLGLSLShader* shader = LLGLSLShader::sCurBoundShaderPtr; llassert(shader); - shader->uniform4fv("object_plane_s", 1, tp0.mV); - shader->uniform4fv("object_plane_t", 1, tp1.mV); + shader->uniform4fv(LLShaderMgr::OBJECT_PLANE_S, 1, tp0.mV); + shader->uniform4fv(LLShaderMgr::OBJECT_PLANE_T, 1, tp1.mV); gGL.matrixMode(LLRender::MM_TEXTURE); gGL.loadIdentity(); @@ -862,8 +862,8 @@ void LLDrawPoolTerrain::renderSimple() if (LLGLSLShader::sNoFixedFunction) { - sShader->uniform4fv("object_plane_s", 1, tp0.mV); - sShader->uniform4fv("object_plane_t", 1, tp1.mV); + sShader->uniform4fv(LLShaderMgr::OBJECT_PLANE_S, 1, tp0.mV); + sShader->uniform4fv(LLShaderMgr::OBJECT_PLANE_T, 1, tp1.mV); } else { diff --git a/indra/newview/lldrawpoolwater.cpp b/indra/newview/lldrawpoolwater.cpp index 4f6eaa5a5b..b6a4b0194c 100644 --- a/indra/newview/lldrawpoolwater.cpp +++ b/indra/newview/lldrawpoolwater.cpp @@ -407,8 +407,8 @@ void LLDrawPoolWater::renderOpaqueLegacyWater() } else { - shader->uniform4fv("object_plane_s", 1, tp0); - shader->uniform4fv("object_plane_t", 1, tp1); + shader->uniform4fv(LLShaderMgr::OBJECT_PLANE_S, 1, tp0); + shader->uniform4fv(LLShaderMgr::OBJECT_PLANE_T, 1, tp1); } gGL.diffuseColor3f(1.f, 1.f, 1.f); @@ -546,7 +546,7 @@ void LLDrawPoolWater::shade() sTime = (F32)LLFrameTimer::getElapsedSeconds()*0.5f; - S32 reftex = shader->enableTexture(LLViewerShaderMgr::WATER_REFTEX); + S32 reftex = shader->enableTexture(LLShaderMgr::WATER_REFTEX); if (reftex > -1) { @@ -577,12 +577,12 @@ void LLDrawPoolWater::shade() mWaterNormp->setFilteringOption(LLTexUnit::TFO_POINT); } - S32 screentex = shader->enableTexture(LLViewerShaderMgr::WATER_SCREENTEX); + S32 screentex = shader->enableTexture(LLShaderMgr::WATER_SCREENTEX); if (screentex > -1) { - shader->uniform4fv(LLViewerShaderMgr::WATER_FOGCOLOR, 1, sWaterFogColor.mV); - shader->uniform1f(LLViewerShaderMgr::WATER_FOGDENSITY, + shader->uniform4fv(LLShaderMgr::WATER_FOGCOLOR, 1, sWaterFogColor.mV); + shader->uniform1f(LLShaderMgr::WATER_FOGDENSITY, param_mgr->getFogDensity()); gPipeline.mWaterDis.bindTexture(0, screentex); } @@ -594,15 +594,9 @@ void LLDrawPoolWater::shade() if (mVertexShaderLevel == 1) { sWaterFogColor.mV[3] = param_mgr->mDensitySliderValue; - shader->uniform4fv(LLViewerShaderMgr::WATER_FOGCOLOR, 1, sWaterFogColor.mV); + shader->uniform4fv(LLShaderMgr::WATER_FOGCOLOR, 1, sWaterFogColor.mV); } - F32 screenRes[] = - { - 1.f/gGLViewport[2], - 1.f/gGLViewport[3] - }; - shader->uniform2fv("screenRes", 1, screenRes); stop_glerror(); S32 diffTex = shader->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP); @@ -614,26 +608,26 @@ void LLDrawPoolWater::shade() light_diffuse *= 6.f; //shader->uniformMatrix4fv("inverse_ref", 1, GL_FALSE, (GLfloat*) gGLObliqueProjectionInverse.mMatrix); - shader->uniform1f(LLViewerShaderMgr::WATER_WATERHEIGHT, eyedepth); - shader->uniform1f(LLViewerShaderMgr::WATER_TIME, sTime); - shader->uniform3fv(LLViewerShaderMgr::WATER_EYEVEC, 1, LLViewerCamera::getInstance()->getOrigin().mV); - shader->uniform3fv(LLViewerShaderMgr::WATER_SPECULAR, 1, light_diffuse.mV); - shader->uniform1f(LLViewerShaderMgr::WATER_SPECULAR_EXP, light_exp); - shader->uniform2fv(LLViewerShaderMgr::WATER_WAVE_DIR1, 1, param_mgr->getWave1Dir().mV); - shader->uniform2fv(LLViewerShaderMgr::WATER_WAVE_DIR2, 1, param_mgr->getWave2Dir().mV); - shader->uniform3fv(LLViewerShaderMgr::WATER_LIGHT_DIR, 1, light_dir.mV); - - shader->uniform3fv("normScale", 1, param_mgr->getNormalScale().mV); - shader->uniform1f("fresnelScale", param_mgr->getFresnelScale()); - shader->uniform1f("fresnelOffset", param_mgr->getFresnelOffset()); - shader->uniform1f("blurMultiplier", param_mgr->getBlurMultiplier()); + shader->uniform1f(LLShaderMgr::WATER_WATERHEIGHT, eyedepth); + shader->uniform1f(LLShaderMgr::WATER_TIME, sTime); + shader->uniform3fv(LLShaderMgr::WATER_EYEVEC, 1, LLViewerCamera::getInstance()->getOrigin().mV); + shader->uniform3fv(LLShaderMgr::WATER_SPECULAR, 1, light_diffuse.mV); + shader->uniform1f(LLShaderMgr::WATER_SPECULAR_EXP, light_exp); + shader->uniform2fv(LLShaderMgr::WATER_WAVE_DIR1, 1, param_mgr->getWave1Dir().mV); + shader->uniform2fv(LLShaderMgr::WATER_WAVE_DIR2, 1, param_mgr->getWave2Dir().mV); + shader->uniform3fv(LLShaderMgr::WATER_LIGHT_DIR, 1, light_dir.mV); + + shader->uniform3fv(LLShaderMgr::WATER_NORM_SCALE, 1, param_mgr->getNormalScale().mV); + shader->uniform1f(LLShaderMgr::WATER_FRESNEL_SCALE, param_mgr->getFresnelScale()); + shader->uniform1f(LLShaderMgr::WATER_FRESNEL_OFFSET, param_mgr->getFresnelOffset()); + shader->uniform1f(LLShaderMgr::WATER_BLUR_MULTIPLIER, param_mgr->getBlurMultiplier()); F32 sunAngle = llmax(0.f, light_dir.mV[2]); F32 scaledAngle = 1.f - sunAngle; - shader->uniform1f("sunAngle", sunAngle); - shader->uniform1f("scaledAngle", scaledAngle); - shader->uniform1f("sunAngle2", 0.1f + 0.2f*sunAngle); + shader->uniform1f(LLShaderMgr::WATER_SUN_ANGLE, sunAngle); + shader->uniform1f(LLShaderMgr::WATER_SCALED_ANGLE, scaledAngle); + shader->uniform1f(LLShaderMgr::WATER_SUN_ANGLE2, 0.1f + 0.2f*sunAngle); LLColor4 water_color; LLVector3 camera_up = LLViewerCamera::getInstance()->getUpAxis(); @@ -641,12 +635,12 @@ void LLDrawPoolWater::shade() if (LLViewerCamera::getInstance()->cameraUnderWater()) { water_color.setVec(1.f, 1.f, 1.f, 0.4f); - shader->uniform1f(LLViewerShaderMgr::WATER_REFSCALE, param_mgr->getScaleBelow()); + shader->uniform1f(LLShaderMgr::WATER_REFSCALE, param_mgr->getScaleBelow()); } else { water_color.setVec(1.f, 1.f, 1.f, 0.5f*(1.f + up_dot)); - shader->uniform1f(LLViewerShaderMgr::WATER_REFSCALE, param_mgr->getScaleAbove()); + shader->uniform1f(LLShaderMgr::WATER_REFSCALE, param_mgr->getScaleAbove()); } if (water_color.mV[3] > 0.9f) @@ -690,11 +684,11 @@ void LLDrawPoolWater::shade() } shader->disableTexture(LLViewerShaderMgr::ENVIRONMENT_MAP, LLTexUnit::TT_CUBE_MAP); - shader->disableTexture(LLViewerShaderMgr::WATER_SCREENTEX); + shader->disableTexture(LLShaderMgr::WATER_SCREENTEX); shader->disableTexture(LLViewerShaderMgr::BUMP_MAP); shader->disableTexture(LLViewerShaderMgr::DIFFUSE_MAP); - shader->disableTexture(LLViewerShaderMgr::WATER_REFTEX); - shader->disableTexture(LLViewerShaderMgr::WATER_SCREENDEPTH); + shader->disableTexture(LLShaderMgr::WATER_REFTEX); + shader->disableTexture(LLShaderMgr::WATER_SCREENDEPTH); if (deferred_render) { diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 6b3127decf..ef91a459e7 100755 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -1407,6 +1407,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, { //use transform feedback to pack vertex buffer //gGLDebugLoggingEnabled = TRUE; LLFastTimer t(FTM_FACE_GEOM_FEEDBACK); + LLGLEnable discard(GL_RASTERIZER_DISCARD); LLVertexBuffer* buff = (LLVertexBuffer*) vf.mVertexBuffer.get(); if (vf.mVertexBuffer.isNull() || buff->getNumVerts() != vf.mNumVertices) @@ -1955,21 +1956,31 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (rebuild_pos) { - LLFastTimer t(FTM_FACE_GEOM_POSITION); + LLVector4a* src = vf.mPositions; + + //_mm_prefetch((char*)src, _MM_HINT_T0); + + LLVector4a* end = src+num_vertices; + //LLVector4a* end_64 = end-4; + + //LLFastTimer t(FTM_FACE_GEOM_POSITION); llassert(num_vertices > 0); mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range); - LLMatrix4a mat_vert; mat_vert.loadu(mat_vert_in); + + F32* dst = (F32*) vert.get(); + F32* end_f32 = dst+mGeomCount*4; - LLVector4a* src = vf.mPositions; - volatile F32* dst = (volatile F32*) vert.get(); - - volatile F32* end = dst+num_vertices*4; - LLVector4a res; + //_mm_prefetch((char*)dst, _MM_HINT_NTA); + //_mm_prefetch((char*)src, _MM_HINT_NTA); + + //_mm_prefetch((char*)dst, _MM_HINT_NTA); + LLVector4a res0; //,res1,res2,res3; + LLVector4a texIdx; S32 index = mTextureIndex < 255 ? mTextureIndex : 0; @@ -1986,29 +1997,53 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, texIdx.set(0,0,0,val); + LLVector4a tmp; + { - LLFastTimer t(FTM_FACE_POSITION_STORE); - LLVector4a tmp; + //LLFastTimer t2(FTM_FACE_POSITION_STORE); - do - { - mat_vert.affineTransform(*src++, res); - tmp.setSelectWithMask(mask, texIdx, res); + /*if (num_vertices > 4) + { //more than 64 bytes + while (src < end_64) + { + _mm_prefetch((char*)src + 64, _MM_HINT_T0); + _mm_prefetch((char*)dst + 64, _MM_HINT_T0); + + mat_vert.affineTransform(*src, res0); + tmp.setSelectWithMask(mask, texIdx, res0); + tmp.store4a((F32*) dst); + + mat_vert.affineTransform(*(src+1), res1); + tmp.setSelectWithMask(mask, texIdx, res1); + tmp.store4a((F32*) dst+4); + + mat_vert.affineTransform(*(src+2), res2); + tmp.setSelectWithMask(mask, texIdx, res2); + tmp.store4a((F32*) dst+8); + + mat_vert.affineTransform(*(src+3), res3); + tmp.setSelectWithMask(mask, texIdx, res3); + tmp.store4a((F32*) dst+12); + + dst += 16; + src += 4; + } + }*/ + + while (src < end) + { + mat_vert.affineTransform(*src++, res0); + tmp.setSelectWithMask(mask, texIdx, res0); tmp.store4a((F32*) dst); dst += 4; } - while(dst < end); } - + { - LLFastTimer t(FTM_FACE_POSITION_PAD); - S32 aligned_pad_vertices = mGeomCount - num_vertices; - res.set(res[0], res[1], res[2], 0.f); - - while (aligned_pad_vertices > 0) + //LLFastTimer t(FTM_FACE_POSITION_PAD); + while (dst < end_f32) { - --aligned_pad_vertices; - res.store4a((F32*) dst); + res0.store4a((F32*) dst); dst += 4; } } @@ -2022,15 +2057,17 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (rebuild_normal) { - LLFastTimer t(FTM_FACE_GEOM_NORMAL); + //LLFastTimer t(FTM_FACE_GEOM_NORMAL); mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range); F32* normals = (F32*) norm.get(); - for (S32 i = 0; i < num_vertices; i++) - { + LLVector4a* src = vf.mNormals; + LLVector4a* end = src+num_vertices; + + while (src < end) + { LLVector4a normal; - mat_normal.rotate(vf.mNormals[i], normal); - normal.normalize3fast(); + mat_normal.rotate(*src++, normal); normal.store4a(normals); normals += 4; } @@ -2047,11 +2084,13 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range); F32* binormals = (F32*) binorm.get(); - for (S32 i = 0; i < num_vertices; i++) + LLVector4a* src = vf.mBinormals; + LLVector4a* end = vf.mBinormals+num_vertices; + + while (src < end) { LLVector4a binormal; - mat_normal.rotate(vf.mBinormals[i], binormal); - binormal.normalize3fast(); + mat_normal.rotate(*src++, binormal); binormal.store4a(binormals); binormals += 4; } diff --git a/indra/newview/llfasttimerview.cpp b/indra/newview/llfasttimerview.cpp index 4dfb93f1bc..e7a3f9b390 100644 --- a/indra/newview/llfasttimerview.cpp +++ b/indra/newview/llfasttimerview.cpp @@ -345,7 +345,7 @@ BOOL LLFastTimerView::handleScrollWheel(S32 x, S32 y, S32 clicks) return TRUE; } -static LLFastTimer::DeclareTimer FTM_RENDER_TIMER("Timers", true); +static LLFastTimer::DeclareTimer FTM_RENDER_TIMER("Timers"); static std::map sTimerColors; diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp index ffeea2f4df..9ffc64312d 100644 --- a/indra/newview/llviewerdisplay.cpp +++ b/indra/newview/llviewerdisplay.cpp @@ -212,13 +212,13 @@ void display_stats() } static LLFastTimer::DeclareTimer FTM_PICK("Picking"); -static LLFastTimer::DeclareTimer FTM_RENDER("Render", true); +static LLFastTimer::DeclareTimer FTM_RENDER("Render"); static LLFastTimer::DeclareTimer FTM_UPDATE_SKY("Update Sky"); static LLFastTimer::DeclareTimer FTM_UPDATE_TEXTURES("Update Textures"); static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE("Update Images"); static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_CLASS("Class"); static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_BUMP("Image Update Bump"); -static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_LIST("List"); +static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_LIST("List", true); static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_DELETE("Delete"); static LLFastTimer::DeclareTimer FTM_RESIZE_WINDOW("Resize Window"); static LLFastTimer::DeclareTimer FTM_HUD_UPDATE("HUD Update"); diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp index ba9818946c..c7677759af 100644 --- a/indra/newview/llviewershadermgr.cpp +++ b/indra/newview/llviewershadermgr.cpp @@ -304,47 +304,6 @@ void LLViewerShaderMgr::initAttribsAndUniforms(void) if (mReservedAttribs.empty()) { LLShaderMgr::initAttribsAndUniforms(); - - mAvatarUniforms.push_back("matrixPalette"); - mAvatarUniforms.push_back("gWindDir"); - mAvatarUniforms.push_back("gSinWaveParams"); - mAvatarUniforms.push_back("gGravity"); - - mWLUniforms.push_back("camPosLocal"); - - mTerrainUniforms.reserve(5); - mTerrainUniforms.push_back("detail_0"); - mTerrainUniforms.push_back("detail_1"); - mTerrainUniforms.push_back("detail_2"); - mTerrainUniforms.push_back("detail_3"); - mTerrainUniforms.push_back("alpha_ramp"); - - mGlowUniforms.push_back("glowDelta"); - mGlowUniforms.push_back("glowStrength"); - - mGlowExtractUniforms.push_back("minLuminance"); - mGlowExtractUniforms.push_back("maxExtractAlpha"); - mGlowExtractUniforms.push_back("lumWeights"); - mGlowExtractUniforms.push_back("warmthWeights"); - mGlowExtractUniforms.push_back("warmthAmount"); - - mShinyUniforms.push_back("origin"); - - mWaterUniforms.reserve(12); - mWaterUniforms.push_back("screenTex"); - mWaterUniforms.push_back("screenDepth"); - mWaterUniforms.push_back("refTex"); - mWaterUniforms.push_back("eyeVec"); - mWaterUniforms.push_back("time"); - mWaterUniforms.push_back("d1"); - mWaterUniforms.push_back("d2"); - mWaterUniforms.push_back("lightDir"); - mWaterUniforms.push_back("specular"); - mWaterUniforms.push_back("lightExp"); - mWaterUniforms.push_back("fogCol"); - mWaterUniforms.push_back("kd"); - mWaterUniforms.push_back("refScale"); - mWaterUniforms.push_back("waterHeight"); } } @@ -915,7 +874,7 @@ BOOL LLViewerShaderMgr::loadShadersEnvironment() gTerrainProgram.mShaderFiles.push_back(make_pair("environment/terrainV.glsl", GL_VERTEX_SHADER_ARB)); gTerrainProgram.mShaderFiles.push_back(make_pair("environment/terrainF.glsl", GL_FRAGMENT_SHADER_ARB)); gTerrainProgram.mShaderLevel = mVertexShaderLevel[SHADER_ENVIRONMENT]; - success = gTerrainProgram.createShader(NULL, &mTerrainUniforms); + success = gTerrainProgram.createShader(NULL, NULL); } if (!success) @@ -953,7 +912,7 @@ BOOL LLViewerShaderMgr::loadShadersWater() gWaterProgram.mShaderFiles.push_back(make_pair("environment/waterV.glsl", GL_VERTEX_SHADER_ARB)); gWaterProgram.mShaderFiles.push_back(make_pair("environment/waterF.glsl", GL_FRAGMENT_SHADER_ARB)); gWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_WATER]; - success = gWaterProgram.createShader(NULL, &mWaterUniforms); + success = gWaterProgram.createShader(NULL, NULL); } if (success) @@ -967,7 +926,7 @@ BOOL LLViewerShaderMgr::loadShadersWater() gUnderWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_WATER]; gUnderWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gUnderWaterProgram.createShader(NULL, &mWaterUniforms); + success = gUnderWaterProgram.createShader(NULL, NULL); } if (success) @@ -985,7 +944,7 @@ BOOL LLViewerShaderMgr::loadShadersWater() gTerrainWaterProgram.mShaderFiles.push_back(make_pair("environment/terrainWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gTerrainWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_ENVIRONMENT]; gTerrainWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - terrainWaterSuccess = gTerrainWaterProgram.createShader(NULL, &mTerrainUniforms); + terrainWaterSuccess = gTerrainWaterProgram.createShader(NULL, NULL); } /// Keep track of water shader levels @@ -1034,7 +993,7 @@ BOOL LLViewerShaderMgr::loadShadersEffects() gGlowProgram.mShaderFiles.push_back(make_pair("effects/glowV.glsl", GL_VERTEX_SHADER_ARB)); gGlowProgram.mShaderFiles.push_back(make_pair("effects/glowF.glsl", GL_FRAGMENT_SHADER_ARB)); gGlowProgram.mShaderLevel = mVertexShaderLevel[SHADER_EFFECT]; - success = gGlowProgram.createShader(NULL, &mGlowUniforms); + success = gGlowProgram.createShader(NULL, NULL); if (!success) { LLPipeline::sRenderGlow = FALSE; @@ -1048,7 +1007,7 @@ BOOL LLViewerShaderMgr::loadShadersEffects() gGlowExtractProgram.mShaderFiles.push_back(make_pair("effects/glowExtractV.glsl", GL_VERTEX_SHADER_ARB)); gGlowExtractProgram.mShaderFiles.push_back(make_pair("effects/glowExtractF.glsl", GL_FRAGMENT_SHADER_ARB)); gGlowExtractProgram.mShaderLevel = mVertexShaderLevel[SHADER_EFFECT]; - success = gGlowExtractProgram.createShader(NULL, &mGlowExtractUniforms); + success = gGlowExtractProgram.createShader(NULL, NULL); if (!success) { LLPipeline::sRenderGlow = FALSE; @@ -1408,7 +1367,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredWaterProgram.mShaderFiles.push_back(make_pair("deferred/waterV.glsl", GL_VERTEX_SHADER_ARB)); gDeferredWaterProgram.mShaderFiles.push_back(make_pair("deferred/waterF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredWaterProgram.createShader(NULL, &mWaterUniforms); + success = gDeferredWaterProgram.createShader(NULL, NULL); } if (success) @@ -1467,7 +1426,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredAvatarShadowProgram.mShaderFiles.push_back(make_pair("deferred/avatarShadowV.glsl", GL_VERTEX_SHADER_ARB)); gDeferredAvatarShadowProgram.mShaderFiles.push_back(make_pair("deferred/avatarShadowF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredAvatarShadowProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredAvatarShadowProgram.createShader(NULL, &mAvatarUniforms); + success = gDeferredAvatarShadowProgram.createShader(NULL, NULL); } if (success) @@ -1488,7 +1447,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredTerrainProgram.mShaderFiles.push_back(make_pair("deferred/terrainV.glsl", GL_VERTEX_SHADER_ARB)); gDeferredTerrainProgram.mShaderFiles.push_back(make_pair("deferred/terrainF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredTerrainProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredTerrainProgram.createShader(NULL, &mTerrainUniforms); + success = gDeferredTerrainProgram.createShader(NULL, NULL); } if (success) @@ -1499,7 +1458,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredAvatarProgram.mShaderFiles.push_back(make_pair("deferred/avatarV.glsl", GL_VERTEX_SHADER_ARB)); gDeferredAvatarProgram.mShaderFiles.push_back(make_pair("deferred/avatarF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredAvatarProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredAvatarProgram.createShader(NULL, &mAvatarUniforms); + success = gDeferredAvatarProgram.createShader(NULL, NULL); } if (success) @@ -1519,7 +1478,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredAvatarAlphaProgram.mShaderFiles.push_back(make_pair("deferred/alphaNonIndexedNoColorF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredAvatarAlphaProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredAvatarAlphaProgram.createShader(NULL, &mAvatarUniforms); + success = gDeferredAvatarAlphaProgram.createShader(NULL, NULL); gDeferredAvatarAlphaProgram.mFeatures.calculatesLighting = true; gDeferredAvatarAlphaProgram.mFeatures.hasLighting = true; @@ -1584,7 +1543,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredWLSkyProgram.mShaderFiles.push_back(make_pair("deferred/skyF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredWLSkyProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; gDeferredWLSkyProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gDeferredWLSkyProgram.createShader(NULL, &mWLUniforms); + success = gDeferredWLSkyProgram.createShader(NULL, NULL); } if (success) @@ -1595,7 +1554,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredWLCloudProgram.mShaderFiles.push_back(make_pair("deferred/cloudsF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredWLCloudProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; gDeferredWLCloudProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gDeferredWLCloudProgram.createShader(NULL, &mWLUniforms); + success = gDeferredWLCloudProgram.createShader(NULL, NULL); } if (success) @@ -1606,7 +1565,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredStarProgram.mShaderFiles.push_back(make_pair("deferred/starsF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredStarProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; gDeferredStarProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gDeferredStarProgram.createShader(NULL, &mWLUniforms); + success = gDeferredStarProgram.createShader(NULL, NULL); } if (success) @@ -1957,7 +1916,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectShinyNonIndexedProgram.mShaderFiles.push_back(make_pair("objects/shinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectShinyNonIndexedProgram.mShaderFiles.push_back(make_pair("objects/shinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectShinyNonIndexedProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gObjectShinyNonIndexedProgram.createShader(NULL, &mShinyUniforms); + success = gObjectShinyNonIndexedProgram.createShader(NULL, NULL); } if (success) @@ -1974,7 +1933,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectShinyNonIndexedWaterProgram.mShaderFiles.push_back(make_pair("objects/shinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectShinyNonIndexedWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; gObjectShinyNonIndexedWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gObjectShinyNonIndexedWaterProgram.createShader(NULL, &mShinyUniforms); + success = gObjectShinyNonIndexedWaterProgram.createShader(NULL, NULL); } if (success) @@ -1990,7 +1949,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectFullbrightShinyNonIndexedProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectFullbrightShinyNonIndexedProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectFullbrightShinyNonIndexedProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gObjectFullbrightShinyNonIndexedProgram.createShader(NULL, &mShinyUniforms); + success = gObjectFullbrightShinyNonIndexedProgram.createShader(NULL, NULL); } if (success) @@ -2008,7 +1967,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectFullbrightShinyNonIndexedWaterProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectFullbrightShinyNonIndexedWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; gObjectFullbrightShinyNonIndexedWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gObjectFullbrightShinyNonIndexedWaterProgram.createShader(NULL, &mShinyUniforms); + success = gObjectFullbrightShinyNonIndexedWaterProgram.createShader(NULL, NULL); } if (success) @@ -2087,7 +2046,6 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectBumpProgram.mShaderFiles.push_back(make_pair("objects/bumpF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectBumpProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; success = gObjectBumpProgram.createShader(NULL, NULL); - if (success) { //lldrawpoolbump assumes "texture0" has channel 0 and "texture1" has channel 1 gObjectBumpProgram.bind(); @@ -2241,7 +2199,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectShinyProgram.mShaderFiles.push_back(make_pair("objects/shinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectShinyProgram.mShaderFiles.push_back(make_pair("objects/shinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectShinyProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gObjectShinyProgram.createShader(NULL, &mShinyUniforms); + success = gObjectShinyProgram.createShader(NULL, NULL); } if (success) @@ -2258,7 +2216,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectShinyWaterProgram.mShaderFiles.push_back(make_pair("objects/shinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectShinyWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; gObjectShinyWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gObjectShinyWaterProgram.createShader(NULL, &mShinyUniforms); + success = gObjectShinyWaterProgram.createShader(NULL, NULL); } if (success) @@ -2274,7 +2232,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectFullbrightShinyProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectFullbrightShinyProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectFullbrightShinyProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gObjectFullbrightShinyProgram.createShader(NULL, &mShinyUniforms); + success = gObjectFullbrightShinyProgram.createShader(NULL, NULL); } if (success) @@ -2292,7 +2250,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectFullbrightShinyWaterProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectFullbrightShinyWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; gObjectFullbrightShinyWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gObjectFullbrightShinyWaterProgram.createShader(NULL, &mShinyUniforms); + success = gObjectFullbrightShinyWaterProgram.createShader(NULL, NULL); } if (mVertexShaderLevel[SHADER_AVATAR] > 0) @@ -2377,7 +2335,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gSkinnedObjectFullbrightShinyProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinySkinnedV.glsl", GL_VERTEX_SHADER_ARB)); gSkinnedObjectFullbrightShinyProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gSkinnedObjectFullbrightShinyProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gSkinnedObjectFullbrightShinyProgram.createShader(NULL, &mShinyUniforms); + success = gSkinnedObjectFullbrightShinyProgram.createShader(NULL, NULL); } if (success) @@ -2394,7 +2352,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gSkinnedObjectShinySimpleProgram.mShaderFiles.push_back(make_pair("objects/shinySimpleSkinnedV.glsl", GL_VERTEX_SHADER_ARB)); gSkinnedObjectShinySimpleProgram.mShaderFiles.push_back(make_pair("objects/shinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gSkinnedObjectShinySimpleProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gSkinnedObjectShinySimpleProgram.createShader(NULL, &mShinyUniforms); + success = gSkinnedObjectShinySimpleProgram.createShader(NULL, NULL); } if (success) @@ -2451,7 +2409,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gSkinnedObjectFullbrightShinyWaterProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinySkinnedV.glsl", GL_VERTEX_SHADER_ARB)); gSkinnedObjectFullbrightShinyWaterProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gSkinnedObjectFullbrightShinyWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gSkinnedObjectFullbrightShinyWaterProgram.createShader(NULL, &mShinyUniforms); + success = gSkinnedObjectFullbrightShinyWaterProgram.createShader(NULL, NULL); } if (success) @@ -2470,7 +2428,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gSkinnedObjectShinySimpleWaterProgram.mShaderFiles.push_back(make_pair("objects/shinySimpleSkinnedV.glsl", GL_VERTEX_SHADER_ARB)); gSkinnedObjectShinySimpleWaterProgram.mShaderFiles.push_back(make_pair("objects/shinyWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gSkinnedObjectShinySimpleWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gSkinnedObjectShinySimpleWaterProgram.createShader(NULL, &mShinyUniforms); + success = gSkinnedObjectShinySimpleWaterProgram.createShader(NULL, NULL); } } @@ -2511,7 +2469,7 @@ BOOL LLViewerShaderMgr::loadShadersAvatar() gAvatarProgram.mShaderFiles.push_back(make_pair("avatar/avatarV.glsl", GL_VERTEX_SHADER_ARB)); gAvatarProgram.mShaderFiles.push_back(make_pair("avatar/avatarF.glsl", GL_FRAGMENT_SHADER_ARB)); gAvatarProgram.mShaderLevel = mVertexShaderLevel[SHADER_AVATAR]; - success = gAvatarProgram.createShader(NULL, &mAvatarUniforms); + success = gAvatarProgram.createShader(NULL, NULL); if (success) { @@ -2530,7 +2488,7 @@ BOOL LLViewerShaderMgr::loadShadersAvatar() // Note: no cloth under water: gAvatarWaterProgram.mShaderLevel = llmin(mVertexShaderLevel[SHADER_AVATAR], 1); gAvatarWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gAvatarWaterProgram.createShader(NULL, &mAvatarUniforms); + success = gAvatarWaterProgram.createShader(NULL, NULL); } /// Keep track of avatar levels @@ -2549,7 +2507,7 @@ BOOL LLViewerShaderMgr::loadShadersAvatar() gAvatarPickProgram.mShaderFiles.push_back(make_pair("avatar/pickAvatarV.glsl", GL_VERTEX_SHADER_ARB)); gAvatarPickProgram.mShaderFiles.push_back(make_pair("avatar/pickAvatarF.glsl", GL_FRAGMENT_SHADER_ARB)); gAvatarPickProgram.mShaderLevel = mVertexShaderLevel[SHADER_AVATAR]; - success = gAvatarPickProgram.createShader(NULL, &mAvatarUniforms); + success = gAvatarPickProgram.createShader(NULL, NULL); } if (success) @@ -2817,7 +2775,7 @@ BOOL LLViewerShaderMgr::loadShadersWindLight() gWLSkyProgram.mShaderFiles.push_back(make_pair("windlight/skyF.glsl", GL_FRAGMENT_SHADER_ARB)); gWLSkyProgram.mShaderLevel = mVertexShaderLevel[SHADER_WINDLIGHT]; gWLSkyProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gWLSkyProgram.createShader(NULL, &mWLUniforms); + success = gWLSkyProgram.createShader(NULL, NULL); } if (success) @@ -2829,7 +2787,7 @@ BOOL LLViewerShaderMgr::loadShadersWindLight() gWLCloudProgram.mShaderFiles.push_back(make_pair("windlight/cloudsF.glsl", GL_FRAGMENT_SHADER_ARB)); gWLCloudProgram.mShaderLevel = mVertexShaderLevel[SHADER_WINDLIGHT]; gWLCloudProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gWLCloudProgram.createShader(NULL, &mWLUniforms); + success = gWLCloudProgram.createShader(NULL, NULL); } return success; diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h index d6dd645e8c..b8552d2d95 100644 --- a/indra/newview/llviewershadermgr.h +++ b/indra/newview/llviewershadermgr.h @@ -74,56 +74,7 @@ public: SHADER_COUNT }; - typedef enum - { - SHINY_ORIGIN = END_RESERVED_UNIFORMS - } eShinyUniforms; - - typedef enum - { - WATER_SCREENTEX = END_RESERVED_UNIFORMS, - WATER_SCREENDEPTH, - WATER_REFTEX, - WATER_EYEVEC, - WATER_TIME, - WATER_WAVE_DIR1, - WATER_WAVE_DIR2, - WATER_LIGHT_DIR, - WATER_SPECULAR, - WATER_SPECULAR_EXP, - WATER_FOGCOLOR, - WATER_FOGDENSITY, - WATER_REFSCALE, - WATER_WATERHEIGHT, - } eWaterUniforms; - - typedef enum - { - WL_CAMPOSLOCAL = END_RESERVED_UNIFORMS, - WL_WATERHEIGHT - } eWLUniforms; - - typedef enum - { - TERRAIN_DETAIL0 = END_RESERVED_UNIFORMS, - TERRAIN_DETAIL1, - TERRAIN_DETAIL2, - TERRAIN_DETAIL3, - TERRAIN_ALPHARAMP - } eTerrainUniforms; - - typedef enum - { - GLOW_DELTA = END_RESERVED_UNIFORMS - } eGlowUniforms; - - typedef enum - { - AVATAR_MATRIX = END_RESERVED_UNIFORMS, - AVATAR_WIND, - AVATAR_SINWAVE, - AVATAR_GRAVITY, - } eAvatarUniforms; + // simple model of forward iterator // http://www.sgi.com/tech/stl/ForwardIterator.html @@ -176,24 +127,6 @@ public: /* virtual */ void updateShaderUniforms(LLGLSLShader * shader); private: - - std::vector mShinyUniforms; - - //water parameters - std::vector mWaterUniforms; - - std::vector mWLUniforms; - - //terrain parameters - std::vector mTerrainUniforms; - - //glow parameters - std::vector mGlowUniforms; - - std::vector mGlowExtractUniforms; - - std::vector mAvatarUniforms; - // the list of shaders we need to propagate parameters to. std::vector mShaderList; diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 6a18534484..b0f23fca42 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -4654,7 +4654,7 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) bump_mask |= LLVertexBuffer::MAP_BINORMAL; genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, FALSE, TRUE); genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, FALSE, TRUE); - genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, FALSE, TRUE); + genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, FALSE, FALSE); genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, TRUE, TRUE); } else diff --git a/indra/newview/llwaterparammanager.cpp b/indra/newview/llwaterparammanager.cpp index 4f52ff9778..548890b5b5 100644 --- a/indra/newview/llwaterparammanager.cpp +++ b/indra/newview/llwaterparammanager.cpp @@ -188,13 +188,11 @@ void LLWaterParamManager::updateShaderUniforms(LLGLSLShader * shader) if (shader->mShaderGroup == LLGLSLShader::SG_WATER) { shader->uniform4fv(LLViewerShaderMgr::LIGHTNORM, 1, LLWLParamManager::getInstance()->getRotatedLightDir().mV); - shader->uniform3fv("camPosLocal", 1, LLViewerCamera::getInstance()->getOrigin().mV); - shader->uniform4fv("waterFogColor", 1, LLDrawPoolWater::sWaterFogColor.mV); - shader->uniform1f("waterFogEnd", LLDrawPoolWater::sWaterFogEnd); - shader->uniform4fv("waterPlane", 1, mWaterPlane.mV); - shader->uniform1f("waterFogDensity", getFogDensity()); - shader->uniform1f("waterFogKS", mWaterFogKS); - shader->uniform1f("distance_multiplier", 0); + shader->uniform4fv(LLShaderMgr::WATER_FOGCOLOR, 1, LLDrawPoolWater::sWaterFogColor.mV); + shader->uniform4fv(LLShaderMgr::WATER_WATERPLANE, 1, mWaterPlane.mV); + shader->uniform1f(LLShaderMgr::WATER_FOGDENSITY, getFogDensity()); + shader->uniform1f(LLShaderMgr::WATER_FOGKS, mWaterFogKS); + shader->uniform1f(LLViewerShaderMgr::DISTANCE_MULTIPLIER, 0); } } diff --git a/indra/newview/llwlparammanager.cpp b/indra/newview/llwlparammanager.cpp index 6077208799..04d41a2512 100644 --- a/indra/newview/llwlparammanager.cpp +++ b/indra/newview/llwlparammanager.cpp @@ -352,7 +352,7 @@ void LLWLParamManager::updateShaderUniforms(LLGLSLShader * shader) if (shader->mShaderGroup == LLGLSLShader::SG_DEFAULT) { shader->uniform4fv(LLViewerShaderMgr::LIGHTNORM, 1, mRotatedLightDir.mV); - shader->uniform3fv("camPosLocal", 1, LLViewerCamera::getInstance()->getOrigin().mV); + shader->uniform3fv(LLShaderMgr::WL_CAMPOSLOCAL, 1, LLViewerCamera::getInstance()->getOrigin().mV); } else if (shader->mShaderGroup == LLGLSLShader::SG_SKY) @@ -360,7 +360,7 @@ void LLWLParamManager::updateShaderUniforms(LLGLSLShader * shader) shader->uniform4fv(LLViewerShaderMgr::LIGHTNORM, 1, mClampedLightDir.mV); } - shader->uniform1f("scene_light_strength", mSceneLightStrength); + shader->uniform1f(LLShaderMgr::SCENE_LIGHT_STRENGTH, mSceneLightStrength); } diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 45d6d23b51..d9771af254 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -7857,13 +7857,6 @@ void LLPipeline::bindDeferredShader(LLGLSLShader& shader, U32 light_index, U32 n shader.uniform2f(LLShaderMgr::DEFERRED_PROJ_SHADOW_RES, mShadow[4].getWidth(), mShadow[4].getHeight()); shader.uniform1f(LLShaderMgr::DEFERRED_DEPTH_CUTOFF, RenderEdgeDepthCutoff); shader.uniform1f(LLShaderMgr::DEFERRED_NORM_CUTOFF, RenderEdgeNormCutoff); - - - if (shader.getUniformLocation("norm_mat") >= 0) - { - glh::matrix4f norm_mat = glh_get_current_modelview().inverse().transpose(); - shader.uniformMatrix4fv("norm_mat", 1, FALSE, norm_mat.m); - } } static LLFastTimer::DeclareTimer FTM_GI_TRACE("Trace"); @@ -7973,8 +7966,7 @@ void LLPipeline::renderDeferredLighting() } gDeferredSunProgram.uniform3fv("offset", slice, offset); - gDeferredSunProgram.uniform2f("screenRes", mDeferredLight.getWidth(), mDeferredLight.getHeight()); - + { LLGLDisable blend(GL_BLEND); LLGLDepthTest depth(GL_TRUE, GL_FALSE, GL_ALWAYS); -- cgit v1.3 From 1816582b929737f92ee68a1422e3be4e7c02f542 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Wed, 6 Mar 2013 09:09:07 -0800 Subject: Fix crashes from using single alloc for pos/norm/tc in volume face data fighting with old free call in model loading code --- indra/llmath/llvolume.cpp | 11 ++++++----- indra/llprimitive/llmodel.cpp | 31 ++++++++++++++++++------------- 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index f989e8ed17..f503eea107 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5307,7 +5307,7 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately @@ -5492,10 +5492,11 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); - - swapData(new_face); + if (new_face.mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index 5ed05e2201..8f0120b064 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -194,6 +194,9 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa cv.setPosition(LLVector4a(v[idx[i+pos_offset]*3+0], v[idx[i+pos_offset]*3+1], v[idx[i+pos_offset]*3+2])); + + if (!cv.getPosition().isFinite3()) + return LLModel::BAD_ELEMENT; } if (tc_source) @@ -207,6 +210,8 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa cv.setNormal(LLVector4a(n[idx[i+norm_offset]*3+0], n[idx[i+norm_offset]*3+1], n[idx[i+norm_offset]*3+2])); + if (!cv.getNormal().isFinite3()) + return LLModel::BAD_ELEMENT; } BOOL found = FALSE; @@ -261,13 +266,13 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } @@ -292,13 +297,13 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } } @@ -480,13 +485,13 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } @@ -514,13 +519,13 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } } @@ -730,13 +735,13 @@ LLModel::EModelStatus load_face_from_dom_polygons(std::vector& fac LLVolumeFace& new_face = *face_list.rbegin(); if (!n) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!t) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } } @@ -1036,7 +1041,7 @@ void LLModel::setVolumeFaceData( } else { - ll_aligned_free_16(face.mNormals); + //ll_aligned_free_16(face.mNormals); face.mNormals = NULL; } @@ -1047,7 +1052,7 @@ void LLModel::setVolumeFaceData( } else { - ll_aligned_free_16(face.mTexCoords); + //ll_aligned_free_16(face.mTexCoords); face.mTexCoords = NULL; } @@ -1246,7 +1251,7 @@ void LLModel::generateNormals(F32 angle_cutoff) } else { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } -- cgit v1.3 From f8e059deee28500b88c8c172eaa8c4d7ca657748 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 8 Mar 2013 17:11:30 -0600 Subject: MAINT-2371 Lat round of optimizations. Reviewed by Graham --- indra/llmath/llmatrix4a.h | 11 +- indra/llmath/llvector4a.inl | 13 +- indra/llmath/llvolume.cpp | 1791 +++++++++++++----------------------- indra/llmath/llvolume.h | 79 +- indra/newview/llflexibleobject.cpp | 18 +- indra/newview/llspatialpartition.h | 2 +- indra/newview/llvovolume.cpp | 157 ++-- 7 files changed, 824 insertions(+), 1247 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index c4cefdb4fa..d141298f69 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -107,15 +107,14 @@ public: inline void rotate(const LLVector4a& v, LLVector4a& res) { + LLVector4a y,z; + res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); - res.mul(mMatrix[0]); - - LLVector4a y; y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); - y.mul(mMatrix[1]); - - LLVector4a z; z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); + + res.mul(mMatrix[0]); + y.mul(mMatrix[1]); z.mul(mMatrix[2]); res.add(y); diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7c52ffef21..35a67204ec 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -460,16 +460,13 @@ inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs) mQ = _mm_max_ps(lhs.mQ, rhs.mQ); } -// Set this to (c * lhs) + rhs * ( 1 - c) +// Set this to lhs + (rhs-lhs)*c inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c) { - LLVector4a a = lhs; - a.mul(c); - - LLVector4a b = rhs; - b.mul(1.f-c); - - setAdd(a, b); + LLVector4a t; + t.setSub(rhs,lhs); + t.mul(c); + setAdd(lhs, t); } inline LLBool32 LLVector4a::isFinite3() const diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index f989e8ed17..9fc72fd801 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -94,6 +94,95 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; +bool less_than_max_mag(const LLVector4a& vec); + +template +LLAlignedArray::LLAlignedArray() +{ + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +LLAlignedArray::~LLAlignedArray() +{ + ll_aligned_free(mArray); + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +void LLAlignedArray::push_back(const T& elem) +{ + T* old_buf = NULL; + if (mCapacity <= mElementCount) + { + mCapacity++; + mCapacity *= 2; + T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); + if (mArray) + { + LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); + } + old_buf = mArray; + mArray = new_buf; + } + + mArray[mElementCount++] = elem; + + //delete old array here to prevent error on a.push_back(a[0]) + ll_aligned_free(old_buf); +} + +template +void LLAlignedArray::resize(U32 size) +{ + if (mCapacity < size) + { + mCapacity = size+mCapacity*2; + T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; + if (mArray) + { + LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); + ll_aligned_free(mArray); + } + + /*for (U32 i = mElementCount; i < mCapacity; ++i) + { + new(new_buf+i) T(); + }*/ + mArray = new_buf; + } + + mElementCount = size; +} + + +template +T& LLAlignedArray::operator[](int idx) +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +const T& LLAlignedArray::operator[](int idx) const +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +T* LLAlignedArray::append(S32 N) +{ + U32 sz = size(); + resize(sz+N); + return &((*this)[sz]); +} + + BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -474,7 +563,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f }; F32 scale = 0.5f; F32 t, t_step, t_first, t_fraction, ang, ang_step; - LLVector3 pt1,pt2; + LLVector4a pt1,pt2; F32 begin = params.getBegin(); F32 end = params.getEnd(); @@ -497,20 +586,21 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // Starting t and ang values for the first face t = t_first; ang = 2.0f*F_PI*(t*ang_scale + offset); - pt1.setVec(cos(ang)*scale,sin(ang)*scale, t); + pt1.set(cos(ang)*scale,sin(ang)*scale, t); // Increment to the next point. // pt2 is the end point on the fractional face t += t_step; ang += ang_step; - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); t_fraction = (begin - t_first)*sides; // Only use if it's not almost exactly on an edge. if (t_fraction < 0.9999f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); mProfile.push_back(new_pt); } @@ -518,12 +608,17 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 while (t < end) { // Iterate through all the integer steps of t. - pt1.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt1.set(cos(ang)*scale,sin(ang)*scale,t); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + LLVector4a new_pt; + new_pt.setSub(pt1, p); + new_pt.mul(1.0f/(float)(split+1) * (float)(i+1)); + new_pt.add(p); + mProfile.push_back(new_pt); } } mProfile.push_back(pt1); @@ -536,18 +631,25 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // pt1 is the first point on the fractional face // pt2 is the end point on the fractional face - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); // Find the fraction that we need to add to the end point. t_fraction = (end - (t - t_step))*sides; if (t_fraction > 0.0001f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + + LLVector4a pt1; + pt1.setSub(new_pt, p); + pt1.mul(1.0f/(float)(split+1) * (float)(i+1)); + pt1.add(p); + mProfile.push_back(pt1); } } mProfile.push_back(new_pt); @@ -568,7 +670,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 if (params.getHollow() <= 0) { // put center point if not hollow. - mProfile.push_back(LLVector3(0,0,0)); + mProfile.push_back(LLVector4a(0,0,0)); } } else @@ -581,103 +683,6 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 mTotal = mProfile.size(); } -void LLProfile::genNormals(const LLProfileParams& params) -{ - S32 count = mProfile.size(); - - S32 outer_count; - if (mTotalOut) - { - outer_count = mTotalOut; - } - else - { - outer_count = mTotal / 2; - } - - mEdgeNormals.resize(count * 2); - mEdgeCenters.resize(count * 2); - mNormals.resize(count); - - LLVector2 pt0,pt1; - - BOOL hollow = (params.getHollow() > 0); - - S32 i0, i1, i2, i3, i4; - - // Parametrically generate normal - for (i2 = 0; i2 < count; i2++) - { - mNormals[i2].mV[0] = mProfile[i2].mV[0]; - mNormals[i2].mV[1] = mProfile[i2].mV[1]; - if (hollow && (i2 >= outer_count)) - { - mNormals[i2] *= -1.f; - } - if (mNormals[i2].magVec() < 0.001) - { - // Special case for point at center, get adjacent points. - i1 = (i2 - 1) >= 0 ? i2 - 1 : count - 1; - i0 = (i1 - 1) >= 0 ? i1 - 1 : count - 1; - i3 = (i2 + 1) < count ? i2 + 1 : 0; - i4 = (i3 + 1) < count ? i3 + 1 : 0; - - pt0.setVec(mProfile[i1].mV[VX] + mProfile[i1].mV[VX] - mProfile[i0].mV[VX], - mProfile[i1].mV[VY] + mProfile[i1].mV[VY] - mProfile[i0].mV[VY]); - pt1.setVec(mProfile[i3].mV[VX] + mProfile[i3].mV[VX] - mProfile[i4].mV[VX], - mProfile[i3].mV[VY] + mProfile[i3].mV[VY] - mProfile[i4].mV[VY]); - - mNormals[i2] = pt0 + pt1; - mNormals[i2] *= 0.5f; - } - mNormals[i2].normVec(); - } - - S32 num_normal_sets = isConcave() ? 2 : 1; - for (S32 normal_set = 0; normal_set < num_normal_sets; normal_set++) - { - S32 point_num; - for (point_num = 0; point_num < mTotal; point_num++) - { - LLVector3 point_1 = mProfile[point_num]; - point_1.mV[VZ] = 0.f; - - LLVector3 point_2; - - if (isConcave() && normal_set == 0 && point_num == (mTotal - 1) / 2) - { - point_2 = mProfile[mTotal - 1]; - } - else if (isConcave() && normal_set == 1 && point_num == mTotal - 1) - { - point_2 = mProfile[(mTotal - 1) / 2]; - } - else - { - LLVector3 delta_pos; - S32 neighbor_point = (point_num + 1) % mTotal; - while(delta_pos.magVecSquared() < 0.01f * 0.01f) - { - point_2 = mProfile[neighbor_point]; - delta_pos = point_2 - point_1; - neighbor_point = (neighbor_point + 1) % mTotal; - if (neighbor_point == point_num) - { - break; - } - } - } - - point_2.mV[VZ] = 0.f; - LLVector3 face_normal = (point_2 - point_1) % LLVector3::z_axis; - face_normal.normVec(); - mEdgeNormals[normal_set * count + point_num] = face_normal; - mEdgeCenters[normal_set * count + point_num] = lerp(point_1, point_2, 0.5f); - } - } -} - - // Hollow is percent of the original bounding box, not of this particular // profile's geometry. Thus, a swept triangle needs lower hollow values than // a swept square. @@ -693,12 +698,13 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3 Face *face = addFace(mTotalOut, mTotal-mTotalOut,0,LL_FACE_INNER_SIDE, flat); - std::vector pt; + static LLAlignedArray pt; pt.resize(mTotal) ; for (S32 i=mTotalOut;imPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + + LLMatrix3 rot(twist * qang); + + pt->mRot.loadu(rot); t+=step; @@ -1408,51 +1419,55 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en // Run through the non-cut dependent points. while (t < params.getEnd()) { - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; - + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); + t+=step; } // Make one final pass for the end cut. t = params.getEnd(); - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; - + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); + mTotal = mPath.size(); } @@ -1549,7 +1564,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, mDirty = FALSE; S32 np = 2; // hardcode for line - mPath.clear(); + mPath.resize(0); mOpen = TRUE; // Is this 0xf0 mask really necessary? DK 03/02/05 @@ -1575,12 +1590,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, for (S32 i=0;imPath.size() * mProfilep->mProfile.size()) > (1u << 20)) - { - llinfos << "sizeS: " << mPathp->mPath.size() << " sizeT: " << mProfilep->mProfile.size() << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split); BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split); @@ -2163,21 +2176,6 @@ BOOL LLVolume::generate() S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); - //******************************************************************** - //debug info, to be removed - if((U32)(sizeS * sizeT) > (1u << 20)) - { - llinfos << "regenPath: " << (S32)regenPath << " regenProf: " << (S32)regenProf << llendl ; - llinfos << "sizeS: " << sizeS << " sizeT: " << sizeT << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - sNumMeshPoints -= mMesh.size(); mMesh.resize(sizeT * sizeS); sNumMeshPoints += mMesh.size(); @@ -2185,22 +2183,39 @@ BOOL LLVolume::generate() //generate vertex positions // Run along the path. + LLVector4a* dst = mMesh.mArray; + for (S32 s = 0; s < sizeS; ++s) { - LLVector2 scale = mPathp->mPath[s].mScale; - LLQuaternion rot = mPathp->mPath[s].mRot; + F32* scale = mPathp->mPath[s].mScale.getF32ptr(); + + F32 sc [] = + { scale[0], 0, 0, 0, + 0, scale[1], 0, 0, + 0, 0, scale[2], 0, + 0, 0, 0, 1 }; + + LLMatrix4 rot((F32*) mPathp->mPath[s].mRot.mMatrix); + LLMatrix4 scale_mat(sc); + + scale_mat *= rot; + + LLMatrix4a rot_mat; + rot_mat.loadu(scale_mat); + + LLVector4a* profile = mProfilep->mProfile.mArray; + LLVector4a* end_profile = profile+sizeT; + LLVector4a offset = mPathp->mPath[s].mPos; + + LLVector4a tmp; // Run along the profile. - for (S32 t = 0; t < sizeT; ++t) + while (profile < end_profile) { - S32 m = s*sizeT + t; - Point& pt = mMesh[m]; - - pt.mPos.mV[0] = mProfilep->mProfile[t].mV[0] * scale.mV[0]; - pt.mPos.mV[1] = mProfilep->mProfile[t].mV[1] * scale.mV[1]; - pt.mPos.mV[2] = 0.0f; - pt.mPos = pt.mPos * rot; - pt.mPos += mPathp->mPath[s].mPos; + rot_mat.rotate(*profile++, tmp); + dst->setAdd(tmp,offset); + llassert(less_than_max_mag(*dst)); + ++dst; } } @@ -2210,9 +2225,11 @@ BOOL LLVolume::generate() LLFaceID id = iter->mFaceID; mFaceMask |= id; } - + LL_CHECK_MEMORY return TRUE; } + + LL_CHECK_MEMORY return FALSE; } @@ -2790,14 +2807,16 @@ void LLVolume::createVolumeFaces() } -inline LLVector3 sculpt_rgb_to_vector(U8 r, U8 g, U8 b) +inline LLVector4a sculpt_rgb_to_vector(U8 r, U8 g, U8 b) { // maps RGB values to vector values [0..255] -> [-0.5..0.5] - LLVector3 value; - value.mV[VX] = r / 255.f - 0.5f; - value.mV[VY] = g / 255.f - 0.5f; - value.mV[VZ] = b / 255.f - 0.5f; + LLVector4a value; + LLVector4a sub(0.5f, 0.5f, 0.5f); + value.set(r,g,b); + value.mul(1.f/255.f); + value.sub(sub); + return value; } @@ -2817,21 +2836,21 @@ inline U32 sculpt_st_to_index(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_w } -inline LLVector3 sculpt_index_to_vector(U32 index, const U8* sculpt_data) +inline LLVector4a sculpt_index_to_vector(U32 index, const U8* sculpt_data) { - LLVector3 v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); + LLVector4a v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); return v; } -inline LLVector3 sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_st_to_index(s, t, size_s, size_t, sculpt_width, sculpt_height, sculpt_components); return sculpt_index_to_vector(index, sculpt_data); } -inline LLVector3 sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_xy_to_index(x, y, sculpt_width, sculpt_height, sculpt_components); @@ -2853,15 +2872,26 @@ F32 LLVolume::sculptGetSurfaceArea() for (S32 t = 0; t < sizeT-1; t++) { // get four corners of quad - LLVector3 p1 = mMesh[(s )*sizeT + (t )].mPos; - LLVector3 p2 = mMesh[(s+1)*sizeT + (t )].mPos; - LLVector3 p3 = mMesh[(s )*sizeT + (t+1)].mPos; - LLVector3 p4 = mMesh[(s+1)*sizeT + (t+1)].mPos; + LLVector4a& p1 = mMesh[(s )*sizeT + (t )]; + LLVector4a& p2 = mMesh[(s+1)*sizeT + (t )]; + LLVector4a& p3 = mMesh[(s )*sizeT + (t+1)]; + LLVector4a& p4 = mMesh[(s+1)*sizeT + (t+1)]; // compute the area of the quad by taking the length of the cross product of the two triangles - LLVector3 cross1 = (p1 - p2) % (p1 - p3); - LLVector3 cross2 = (p4 - p2) % (p4 - p3); - area += (cross1.magVec() + cross2.magVec()) / 2.f; + LLVector4a v0,v1,v2,v3; + v0.setSub(p1,p2); + v1.setSub(p1,p3); + v2.setSub(p4,p2); + v3.setSub(p4,p3); + + LLVector4a cross1, cross2; + cross1.setCross3(v0,v1); + cross2.setCross3(v2,v3); + + //LLVector3 cross1 = (p1 - p2) % (p1 - p3); + //LLVector3 cross2 = (p4 - p2) % (p4 - p3); + + area += (cross1.getLength3() + cross2.getLength3()).getF32() / 2.f; } } @@ -2882,17 +2912,19 @@ void LLVolume::sculptGeneratePlaceholder() for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; F32 u = (F32)s/(sizeS-1); F32 v = (F32)t/(sizeT-1); const F32 RADIUS = (F32) 0.3; - - pt.mPos.mV[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[2] = (F32)(cos(F_PI * v) * RADIUS); + + F32* p = pt.getF32ptr(); + + p[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); + p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); + p[2] = (F32)(cos(F_PI * v) * RADIUS); } line += sizeT; @@ -2917,7 +2949,7 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; S32 reversed_t = t; @@ -2974,11 +3006,12 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 } } - pt.mPos = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); + pt = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); if (sculpt_mirror) { - pt.mPos.mV[VX] *= -1.f; + LLVector4a scale(-1.f,1,1,1); + pt.mul(scale); } } @@ -3560,803 +3593,125 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h return true; } -S32 *LLVolume::getTriangleIndices(U32 &num_indices) const +void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) +{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the + //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost + F32 detail[] = {1.f, 1.5f, 2.5f, 4.f}; + for (S32 i = 0; i < 4; i++) + { + S32 count = 0; + S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]); + S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]); + + count = (profile_points-1)*2*(path_points-1); + count += profile_points*2; + + counts[i] = count; + } +} + + +S32 LLVolume::getNumTriangles(S32* vcount) const { - S32 expected_num_triangle_indices = getNumTriangleIndices(); - if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES) + U32 triangle_count = 0; + U32 vertex_count = 0; + + for (S32 i = 0; i < getNumVolumeFaces(); ++i) { - // we don't allow LLVolumes with this many vertices - llwarns << "Couldn't allocate triangle indices" << llendl; - num_indices = 0; - return NULL; + const LLVolumeFace& face = getVolumeFace(i); + triangle_count += face.mNumIndices/3; + + vertex_count += face.mNumVertices; } - S32* index = new S32[expected_num_triangle_indices]; - S32 count = 0; - // Let's do this totally diffently, as we don't care about faces... - // Counter-clockwise triangles are forward facing... + if (vcount) + { + *vcount = vertex_count; + } + + return triangle_count; +} + - BOOL open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - S32 size_s, size_s_out, size_t; - S32 s, t, i; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); +//----------------------------------------------------------------------------- +// generateSilhouetteVertices() +//----------------------------------------------------------------------------- +void LLVolume::generateSilhouetteVertices(std::vector &vertices, + std::vector &normals, + const LLVector3& obj_cam_vec_in, + const LLMatrix4& mat_in, + const LLMatrix3& norm_mat_in, + S32 face_mask) +{ + LLMatrix4a mat; + mat.loadu(mat_in); + + LLMatrix4a norm_mat; + norm_mat.loadu(norm_mat_in); + + LLVector4a obj_cam_vec; + obj_cam_vec.load3(obj_cam_vec_in.mV); - // NOTE -- if the construction of the triangles below ever changes - // then getNumTriangleIndices() method may also have to be updated. + vertices.clear(); + normals.clear(); - if (open) /* Flawfinder: ignore */ + if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) { - if (hollow) + return; + } + + S32 cur_index = 0; + //for each face + for (face_list_t::iterator iter = mVolumeFaces.begin(); + iter != mVolumeFaces.end(); ++iter) + { + LLVolumeFace& face = *iter; + + if (!(face_mask & (0x1 << cur_index++)) || + face.mNumIndices == 0 || face.mEdge.empty()) { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 + continue; + } - for (t = 0; t < size_t - 1; t++) - { - // The outer face, first cut, and inner face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 + if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) { - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } + } + else { - // The other cut face - index[count++] = s + t*size_s; // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = s + (t+1)*size_s; // x,y+1 - - index[count++] = s + (t+1)*size_s; // x,y+1 - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } + //============================================== + //DEBUG draw edge map instead of silhouette edge + //============================================== - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; +#if DEBUG_SILHOUETTE_EDGE_MAP - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } + //for each triangle + U32 count = face.mNumIndices; + for (U32 j = 0; j < count/3; j++) { + //get vertices + S32 v1 = face.mIndices[j*3+0]; + S32 v2 = face.mIndices[j*3+1]; + S32 v3 = face.mIndices[j*3+2]; - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + //get current face center + LLVector3 cCenter = (face.mVertices[v1].getPosition() + + face.mVertices[v2].getPosition() + + face.mVertices[v3].getPosition()) / 3.0f; - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } + //for each edge + for (S32 k = 0; k < 3; k++) { + S32 nIndex = face.mEdge[j*3+k]; + if (nIndex <= -1) { + continue; } - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; + if (nIndex >= (S32) count/3) { + continue; } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Open solid - - for (t = 0; t < size_t - 1; t++) - { - // Outer face + 1 cut face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - - // The other cut face - index[count++] = (size_s - 1) + (t*size_s); // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - index[count++] = 0 + (t*size_s); // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - for (s = 0; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = size_s - 1; - } - - // We've got a top cap - S32 offset = (size_t - 1)*size_s; - for (s = 0; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset + size_s - 1; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - } - else if (hollow) - { - // Closed hollow - // Outer face - - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s_out - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Inner face - // Invert facing from outer face - for (t = 0; t < size_t - 1; t++) - { - for (s = size_s_out; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; - - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; - } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Closed solid. Easy case. - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s - 1; s++) - { - // Should wrap properly, but for now... - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // bottom cap - for (s = 1; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = 0; - } - - // top cap - S32 offset = (size_t - 1)*size_s; - for (s = 1; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - -#ifdef LL_DEBUG - // assert that we computed the correct number of indices - if (count != expected_num_triangle_indices ) - { - llerrs << "bad index count prediciton:" - << " expected=" << expected_num_triangle_indices - << " actual=" << count << llendl; - } -#endif - -#if 0 - // verify that each index does not point beyond the size of the mesh - S32 num_vertices = mMesh.size(); - for (i = 0; i < count; i+=3) - { - llinfos << index[i] << ":" << index[i+1] << ":" << index[i+2] << llendl; - llassert(index[i] < num_vertices); - llassert(index[i+1] < num_vertices); - llassert(index[i+2] < num_vertices); - } -#endif - - num_indices = count; - return index; -} - -void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) -{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the - //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost - F32 detail[] = {1.f, 1.5f, 2.5f, 4.f}; - for (S32 i = 0; i < 4; i++) - { - S32 count = 0; - S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]); - S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]); - - count = (profile_points-1)*2*(path_points-1); - count += profile_points*2; - - counts[i] = count; - } -} - -S32 LLVolume::getNumTriangleIndices() const -{ - BOOL profile_open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - - S32 size_s, size_s_out, size_t; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); - - S32 count = 0; - if (profile_open) /* Flawfinder: ignore */ - { - if (hollow) - { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - else - { - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - } - else if (hollow) - { - // Closed hollow - // Outer face - count = (size_t - 1) * (size_s_out - 1) * 6; - - // Inner face - count += (size_t - 1) * ((size_s - 1) - size_s_out) * 6; - } - else - { - // Closed solid. Easy case. - count = (size_t - 1) * (size_s - 1) * 6; - } - - if (path_open) - { - S32 cap_triangle_count = size_s - 3; - if ( profile_open - || hollow ) - { - cap_triangle_count = size_s - 2; - } - if ( cap_triangle_count > 0 ) - { - // top and bottom caps - count += cap_triangle_count * 2 * 3; - } - } - return count; -} - - -S32 LLVolume::getNumTriangles(S32* vcount) const -{ - U32 triangle_count = 0; - U32 vertex_count = 0; - - for (S32 i = 0; i < getNumVolumeFaces(); ++i) - { - const LLVolumeFace& face = getVolumeFace(i); - triangle_count += face.mNumIndices/3; - - vertex_count += face.mNumVertices; - } - - - if (vcount) - { - *vcount = vertex_count; - } - - return triangle_count; -} - - -//----------------------------------------------------------------------------- -// generateSilhouetteVertices() -//----------------------------------------------------------------------------- -void LLVolume::generateSilhouetteVertices(std::vector &vertices, - std::vector &normals, - const LLVector3& obj_cam_vec_in, - const LLMatrix4& mat_in, - const LLMatrix3& norm_mat_in, - S32 face_mask) -{ - LLMatrix4a mat; - mat.loadu(mat_in); - - LLMatrix4a norm_mat; - norm_mat.loadu(norm_mat_in); - - LLVector4a obj_cam_vec; - obj_cam_vec.load3(obj_cam_vec_in.mV); - - vertices.clear(); - normals.clear(); - - if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) - { - return; - } - - S32 cur_index = 0; - //for each face - for (face_list_t::iterator iter = mVolumeFaces.begin(); - iter != mVolumeFaces.end(); ++iter) - { - LLVolumeFace& face = *iter; - - if (!(face_mask & (0x1 << cur_index++)) || - face.mNumIndices == 0 || face.mEdge.empty()) - { - continue; - } - - if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) { - - } - else { - - //============================================== - //DEBUG draw edge map instead of silhouette edge - //============================================== - -#if DEBUG_SILHOUETTE_EDGE_MAP - - //for each triangle - U32 count = face.mNumIndices; - for (U32 j = 0; j < count/3; j++) { - //get vertices - S32 v1 = face.mIndices[j*3+0]; - S32 v2 = face.mIndices[j*3+1]; - S32 v3 = face.mIndices[j*3+2]; - - //get current face center - LLVector3 cCenter = (face.mVertices[v1].getPosition() + - face.mVertices[v2].getPosition() + - face.mVertices[v3].getPosition()) / 3.0f; - - //for each edge - for (S32 k = 0; k < 3; k++) { - S32 nIndex = face.mEdge[j*3+k]; - if (nIndex <= -1) { - continue; - } - - if (nIndex >= (S32) count/3) { - continue; - } - //get neighbor vertices - v1 = face.mIndices[nIndex*3+0]; - v2 = face.mIndices[nIndex*3+1]; - v3 = face.mIndices[nIndex*3+2]; + //get neighbor vertices + v1 = face.mIndices[nIndex*3+0]; + v2 = face.mIndices[nIndex*3+1]; + v3 = face.mIndices[nIndex*3+2]; //get neighbor face center LLVector3 nCenter = (face.mVertices[v1].getPosition() + @@ -5243,8 +4598,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) freeData(); - LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a)); - resizeVertices(src.mNumVertices); resizeIndices(src.mNumIndices); @@ -5307,7 +4660,7 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately @@ -5331,52 +4684,23 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) delete mOctree; mOctree = NULL; + LL_CHECK_MEMORY BOOL ret = FALSE ; if (mTypeMask & CAP_MASK) { ret = createCap(volume, partial_build); + LL_CHECK_MEMORY } else if ((mTypeMask & END_MASK) || (mTypeMask & SIDE_MASK)) { ret = createSide(volume, partial_build); + LL_CHECK_MEMORY } else { llerrs << "Unknown/uninitialized face type!" << llendl; } - //update the range of the texture coordinates - if(ret) - { - mTexCoordExtents[0].setVec(1.f, 1.f) ; - mTexCoordExtents[1].setVec(0.f, 0.f) ; - - for(U32 i = 0 ; i < mNumVertices ; i++) - { - if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0]) - { - mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ; - } - if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0]) - { - mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ; - } - - if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1]) - { - mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ; - } - if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1]) - { - mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ; - } - } - mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ; - mTexCoordExtents[0].mV[1] = llmax(0.f, mTexCoordExtents[0].mV[1]) ; - mTexCoordExtents[1].mV[0] = llmin(1.f, mTexCoordExtents[1].mV[0]) ; - mTexCoordExtents[1].mV[1] = llmin(1.f, mTexCoordExtents[1].mV[1]) ; - } - return ret ; } @@ -6068,8 +5392,10 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0, BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + LL_CHECK_MEMORY + + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6099,9 +5425,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) VertexData baseVert; for(S32 t = 0; t < 4; t++) { - corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); - corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; - corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; + corners[t].getPosition().load4a(mesh[offset + (grid_size*t)].getF32ptr()); + corners[t].mTexCoord.mV[0] = profile[grid_size*t][0]+0.5f; + corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t][1]; } { @@ -6182,6 +5508,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) mCenter->mul(0.5f); } + llassert(less_than_max_mag(mExtents[0])); + llassert(less_than_max_mag(mExtents[1])); + if (!partial_build) { resizeIndices(grid_size*grid_size*6); @@ -6212,6 +5541,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY return TRUE; } @@ -6230,8 +5560,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) S32 num_vertices = 0, num_indices = 0; - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; // All types of caps have the same number of vertices and indices num_vertices = profile.size(); @@ -6251,13 +5581,14 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { resizeVertices(num_vertices); allocateBinormals(num_vertices); - if (!partial_build) { resizeIndices(num_indices); } } + LL_CHECK_MEMORY; + S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6288,35 +5619,68 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector4a* binorm = (LLVector4a*) mBinormals; // Copy the vertices into the array - for (S32 i = 0; i < num_vertices; i++) + + const LLVector4a* src = mesh.mArray+offset; + const LLVector4a* end = src+num_vertices; + + min = *src; + max = min; + + + const LLVector4a* p = profile.mArray; + + if (mTypeMask & TOP_MASK) { - if (mTypeMask & TOP_MASK) - { - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = profile[i].mV[1]+0.5f; - } - else + min_uv.set((*p)[0]+0.5f, + (*p)[1]+0.5f); + + max_uv = min_uv; + + while(src < end) { - // Mirror for underside. - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = 0.5f - profile[i].mV[1]; - } + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = (*p)[1]+0.5f; - pos[i].load3(mesh[i + offset].mPos.mV); + llassert(less_than_max_mag(*src)); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); - if (i == 0) - { - max = pos[i]; - min = max; - min_uv = max_uv = tc[i]; + *pos = *src; + + ++p; + ++tc; + ++src; + ++pos; } - else + } + else + { + + min_uv.set((*p)[0]+0.5f, + 0.5f - (*p)[1]); + max_uv = min_uv; + + while(src < end) { - update_min_max(min,max,pos[i]); - update_min_max(min_uv, max_uv, tc[i]); + // Mirror for underside. + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = 0.5f - (*p)[1]; + + llassert(less_than_max_mag(*src)); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); + + *pos = *src; + + ++p; + ++tc; + ++src; + ++pos; } } + LL_CHECK_MEMORY + mCenter->setAdd(min, max); mCenter->mul(0.5f); @@ -6353,15 +5717,25 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { - pos[num_vertices] = *mCenter; - tc[num_vertices] = cuv; + *pos++ = *mCenter; + *tc++ = cuv; num_vertices++; } - - for (S32 i = 0; i < num_vertices; i++) + + LL_CHECK_MEMORY + + F32* dst_binorm = (F32*) binorm; + F32* end_binorm = (F32*) (binorm+num_vertices); + + F32* dst_norm = (F32*) norm; + + while (dst_binorm < end_binorm) { - binorm[i].load4a(binormal.getF32ptr()); - norm[i].load4a(normal.getF32ptr()); + binormal.store4a(dst_binorm); + normal.store4a(dst_norm); + + dst_binorm += 4; + dst_norm += 4; } if (partial_build) @@ -6382,33 +5756,38 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; + + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; + //p1.mV[VZ] = 0.f; + //p2.mV[VZ] = 0.f; + //pa.mV[VZ] = 0.f; + //pb.mV[VZ] = 0.f; // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6443,10 +5822,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1, pa); + + LLVector4a d2; + d2.setSub(p2, pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6485,33 +5867,33 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; + + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); + // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6546,10 +5928,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1,pa); + LLVector4a d2; + d2.setSub(p2,pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6598,6 +5982,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } + + LL_CHECK_MEMORY return TRUE; } @@ -6900,6 +6286,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { + LL_CHECK_MEMORY BOOL flat = mTypeMask & FLAT_MASK; U8 sculpt_type = volume->getParams().getSculptType(); @@ -6910,9 +6297,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) S32 num_vertices, num_indices; - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; - const std::vector& path_data = volume->getPath().mPath; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; + const LLAlignedArray& path_data = volume->getPath().mPath; S32 max_s = volume->getProfile().getTotal(); @@ -6933,10 +6320,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + LLVector4a* pos = (LLVector4a*) mPositions; - LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - F32 begin_stex = floorf(profile[mBeginS].mV[2]); + F32 begin_stex = floorf(profile[mBeginS][2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; @@ -6965,11 +6353,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) // Get s value for tex-coord. if (!flat) { - ss = profile[mBeginS + s].mV[2]; + ss = profile[mBeginS + s][2]; } else { - ss = profile[mBeginS + s].mV[2] - begin_stex; + ss = profile[mBeginS + s][2] - begin_stex; } } @@ -6989,19 +6377,17 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) i = mBeginS + s + max_s*t; } - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); - norm[cur_vertex].clear(); cur_vertex++; if (test && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - - norm[cur_vertex].clear(); - + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); cur_vertex++; } } @@ -7018,28 +6404,66 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } i = mBeginS + s + max_s*t; - ss = profile[mBeginS + s].mV[2] - begin_stex; - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - norm[cur_vertex].clear(); - + ss = profile[mBeginS + s][2] - begin_stex; + + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); + cur_vertex++; } } + LL_CHECK_MEMORY - //get bounding box for this side - LLVector4a& face_min = mExtents[0]; - LLVector4a& face_max = mExtents[1]; + mCenter->clear(); - face_min = face_max = pos[0]; + LLVector4a* cur_pos = pos; + LLVector4a* end_pos = pos + mNumVertices; + + //get bounding box for this side + LLVector4a face_min; + LLVector4a face_max; + + face_min = face_max = *cur_pos++; + + while (cur_pos < end_pos) + { + update_min_max(face_min, face_max, *cur_pos++); + } + + mExtents[0] = face_min; + mExtents[1] = face_max; + + U32 tc_count = mNumVertices; + if (tc_count%2 == 1) + { //odd number of texture coordinates, duplicate last entry to padded end of array + tc_count++; + mTexCoords[mNumVertices] = mTexCoords[mNumVertices-1]; + } + + LLVector4a* cur_tc = (LLVector4a*) mTexCoords; + LLVector4a* end_tc = (LLVector4a*) (mTexCoords+tc_count); + + LLVector4a tc_min; + LLVector4a tc_max; + + tc_min = tc_max = *cur_tc++; - for (U32 i = 1; i < mNumVertices; ++i) + while (cur_tc < end_tc) { - update_min_max(face_min, face_max, pos[i]); + update_min_max(tc_min, tc_max, *cur_tc++); } + F32* minp = tc_min.getF32ptr(); + F32* maxp = tc_max.getF32ptr(); + + mTexCoordExtents[0].mV[0] = llmin(minp[0], minp[2]); + mTexCoordExtents[0].mV[1] = llmin(minp[1], minp[3]); + mTexCoordExtents[1].mV[0] = llmax(maxp[0], maxp[2]); + mTexCoordExtents[1].mV[1] = llmax(maxp[1], maxp[3]); + mCenter->setAdd(face_min, face_max); mCenter->mul(0.5f); @@ -7104,33 +6528,94 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + //clear normals - for (U32 i = 0; i < mNumVertices; i++) + F32* dst = (F32*) mNormals; + F32* end = (F32*) (mNormals+mNumVertices); + LLVector4a zero = LLVector4a::getZero(); + + while (dst < end) { - mNormals[i].clear(); + zero.store4a(dst); + dst += 4; } + LL_CHECK_MEMORY + //generate normals U32 count = mNumIndices/3; - for (U32 i = 0; i < count; i++) //for each triangle + LLVector4a* norm = mNormals; + + static LLAlignedArray triangle_normals; + triangle_normals.resize(count); + LLVector4a* output = triangle_normals.mArray; + LLVector4a* end_output = output+count; + + U16* idx = mIndices; + + while (output < end_output) { - const U16* idx = &(mIndices[i*3]); - - LLVector4a& v0 = *(pos+idx[0]); - LLVector4a& v1 = *(pos+idx[1]); - LLVector4a& v2 = *(pos+idx[2]); - - LLVector4a& n0 = *(norm+idx[0]); - LLVector4a& n1 = *(norm+idx[1]); - LLVector4a& n2 = *(norm+idx[2]); + LLVector4a b,v1,v2; + b.load4a((F32*) (pos+idx[0])); + v1.load4a((F32*) (pos+idx[1])); + v2.load4a((F32*) (pos+idx[2])); //calculate triangle normal - LLVector4a a, b, c; + LLVector4a a; - a.setSub(v0, v1); - b.setSub(v0, v2); - c.setCross3(a,b); + a.setSub(b, v1); + b.sub(v2); + + + LLQuad& vector1 = *((LLQuad*) &v1); + LLQuad& vector2 = *((LLQuad*) &v2); + + LLQuad& amQ = *((LLQuad*) &a); + LLQuad& bmQ = *((LLQuad*) &b); + + //v1.setCross3(t,v0); + //setCross3(const LLVector4a& a, const LLVector4a& b) + // Vectors are stored in memory in w, z, y, x order from high to low + // Set vector1 = { a[W], a[X], a[Z], a[Y] } + vector1 = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // Set vector2 = { b[W], b[Y], b[X], b[Z] } + vector2 = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] } + vector2 = _mm_mul_ps( vector1, vector2 ); + // vector3 = { a[W], a[Y], a[X], a[Z] } + amQ = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // vector4 = { b[W], b[X], b[Z], b[Y] } + bmQ = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } + vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ )); + + v1.store4a((F32*) output); + + output++; + idx += 3; + } + + idx = mIndices; + + LLVector4a* src = triangle_normals.mArray; + + for (U32 i = 0; i < count; i++) //for each triangle + { + LLVector4a c; + c.load4a((F32*) (src++)); + + LLVector4a* n0p = norm+idx[0]; + LLVector4a* n1p = norm+idx[1]; + LLVector4a* n2p = norm+idx[2]; + + idx += 3; + + LLVector4a n0,n1,n2; + n0.load4a((F32*) n0p); + n1.load4a((F32*) n1p); + n2.load4a((F32*) n2p); n0.add(c); n1.add(c); @@ -7143,8 +6628,14 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) case 1: n1.add(c); break; case 2: n2.add(c); break; }; + + n0.store4a((F32*) n0p); + n1.store4a((F32*) n1p); + n2.store4a((F32*) n2p); } + LL_CHECK_MEMORY + // adjust normals based on wrapping and stitching LLVector4a top; @@ -7276,6 +6767,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } + LL_CHECK_MEMORY + return TRUE; } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 1d3b0fe52f..5e43af92ec 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -37,7 +37,6 @@ class LLPath; template class LLOctreeNode; -class LLVector4a; class LLVolumeFace; class LLVolume; class LLVolumeTriangle; @@ -50,6 +49,8 @@ class LLVolumeTriangle; #include "v3math.h" #include "v3dmath.h" #include "v4math.h" +#include "llvector4a.h" +#include "llmatrix4a.h" #include "llquaternion.h" #include "llstrider.h" #include "v4coloru.h" @@ -194,6 +195,26 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128; const S32 LL_SCULPT_MESH_MAX_FACES = 8; +template +class LLAlignedArray +{ +public: + T* mArray; + U32 mElementCount; + U32 mCapacity; + + LLAlignedArray(); + ~LLAlignedArray(); + + void push_back(const T& elem); + U32 size() const { return mElementCount; } + void resize(U32 size); + T* append(S32 N); + T& operator[](int idx); + const T& operator[](int idx) const; +}; + + class LLProfileParams { public: @@ -708,16 +729,16 @@ public: LLFaceID mFaceID; }; - std::vector mProfile; - std::vector mNormals; + LLAlignedArray mProfile; + //LLAlignedArray mNormals; std::vector mFaces; - std::vector mEdgeNormals; - std::vector mEdgeCenters; + + //LLAlignedArray mEdgeNormals; + //LLAlignedArray mEdgeCenters; friend std::ostream& operator<<(std::ostream &s, const LLProfile &profile); protected: - void genNormals(const LLProfileParams& params); static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); @@ -741,13 +762,29 @@ protected: class LLPath { public: - struct PathPt + class PathPt { - LLVector3 mPos; - LLVector2 mScale; - LLQuaternion mRot; + public: + LLMatrix4a mRot; + LLVector4a mPos; + + LLVector4a mScale; F32 mTexT; - PathPt() { mPos.setVec(0,0,0); mTexT = 0; mScale.setVec(0,0); mRot.loadIdentity(); } + F32 pad[3]; //for alignment + PathPt() + { + mPos.clear(); + mTexT = 0; + mScale.clear(); + mRot.setRows(LLVector4a(1,0,0,0), + LLVector4a(0,1,0,0), + LLVector4a(0,0,1,0)); + + //distinguished data in the pad for debugging + pad[0] = 3.14159f; + pad[1] = -3.14159f; + pad[2] = 0.585f; + } }; public: @@ -779,7 +816,7 @@ public: friend std::ostream& operator<<(std::ostream &s, const LLPath &path); public: - std::vector mPath; + LLAlignedArray mPath; protected: BOOL mOpen; @@ -951,11 +988,7 @@ protected: ~LLVolume(); // use unref public: - struct Point - { - LLVector3 mPos; - }; - + struct FaceParams { LLFaceID mFaceID; @@ -978,8 +1011,8 @@ public: const LLProfile& getProfile() const { return *mProfilep; } LLPath& getPath() const { return *mPathp; } void resizePath(S32 length); - const std::vector& getMesh() const { return mMesh; } - const LLVector3& getMeshPt(const U32 i) const { return mMesh[i].mPos; } + const LLAlignedArray& getMesh() const { return mMesh; } + const LLVector4a& getMeshPt(const U32 i) const { return mMesh[i]; } void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); } @@ -994,10 +1027,7 @@ public: S32 getSculptLevel() const { return mSculptLevel; } void setSculptLevel(S32 level) { mSculptLevel = level; } - S32 *getTriangleIndices(U32 &num_indices) const; - - // returns number of triangle indeces required for path/profile mesh - S32 getNumTriangleIndices() const; + static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts); S32 getNumTriangles(S32* vcount = NULL) const; @@ -1070,7 +1100,8 @@ public: LLVolumeParams mParams; LLPath *mPathp; LLProfile *mProfilep; - std::vector mMesh; + LLAlignedArray mMesh; + BOOL mGenerateSingleFace; typedef std::vector face_list_t; diff --git a/indra/newview/llflexibleobject.cpp b/indra/newview/llflexibleobject.cpp index 77a0cdffce..cd4718381b 100644 --- a/indra/newview/llflexibleobject.cpp +++ b/indra/newview/llflexibleobject.cpp @@ -683,30 +683,36 @@ void LLVolumeImplFlexible::doFlexibleUpdate() LLVector4(z_axis, 0.f), LLVector4(delta_pos, 1.f)); + LL_CHECK_MEMORY for (i=0; i<=num_render_sections; ++i) { new_point = &path->mPath[i]; LLVector3 pos = newSection[i].mPosition * rel_xform; LLQuaternion rot = mSection[i].mAxisRotation * newSection[i].mRotation * delta_rot; - - if (!mUpdated || (new_point->mPos-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f) + + LLVector3 np(new_point->mPos.getF32ptr()); + + if (!mUpdated || (np-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f) { - new_point->mPos = newSection[i].mPosition * rel_xform; + new_point->mPos.load3((newSection[i].mPosition * rel_xform).mV); mUpdated = FALSE; } - new_point->mRot = rot; - new_point->mScale = newSection[i].mScale; + new_point->mRot.loadu(LLMatrix3(rot)); + new_point->mScale.set(newSection[i].mScale.mV[0], newSection[i].mScale.mV[1], 0,1); new_point->mTexT = ((F32)i)/(num_render_sections); } - + LL_CHECK_MEMORY mLastSegmentRotation = parentSegmentRotation; } +static LLFastTimer::DeclareTimer FTM_FLEXI_PREBUILD("Flexi Prebuild"); + void LLVolumeImplFlexible::preRebuild() { if (!mUpdated) { + LLFastTimer t(FTM_FLEXI_PREBUILD); doFlexibleRebuild(); } } diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index b1706d9d35..b5543c4a37 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -739,7 +739,7 @@ class LLVolumeGeometryManager: public LLGeometryManager virtual void rebuildGeom(LLSpatialGroup* group); virtual void rebuildMesh(LLSpatialGroup* group); virtual void getGeometry(LLSpatialGroup* group); - void genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector& faces, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE); + void genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE); void registerFace(LLSpatialGroup* group, LLFace* facep, U32 type); }; diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 7adf18b6d0..597fb03526 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -1051,8 +1051,7 @@ BOOL LLVOVolume::setVolume(const LLVolumeParams ¶ms_in, const S32 detail, bo } } } - - + static LLCachedControl use_transform_feedback(gSavedSettings, "RenderUseTransformFeedback"); bool cache_in_vram = use_transform_feedback && gTransformPositionProgram.mProgramObject && @@ -4242,11 +4241,20 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) mFaceList.clear(); - std::vector fullbright_faces; - std::vector bump_faces; - std::vector simple_faces; + const U32 MAX_FACE_COUNT = 4096; + + static LLFace** fullbright_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + static LLFace** bump_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + static LLFace** simple_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + static LLFace** alpha_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + + U32 fullbright_count = 0; + U32 bump_count = 0; + U32 simple_count = 0; + U32 alpha_count = 0; + - std::vector alpha_faces; + U32 useage = group->mSpatialPartition->mBufferUsage; U32 max_vertices = (gSavedSettings.getS32("RenderMaxVBOSize")*1024)/LLVertexBuffer::calcVertexSize(group->mSpatialPartition->mVertexDataMask); @@ -4257,6 +4265,8 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) bool emissive = false; + + { LLFastTimer t(FTM_REBUILD_VOLUME_FACE_LIST); @@ -4558,7 +4568,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) { if (facep->canRenderAsMask()) { //can be treated as alpha mask - simple_faces.push_back(facep); + if (simple_count < MAX_FACE_COUNT) + { + simple_faces[simple_count++] = facep; + } } else { @@ -4566,7 +4579,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) { //only treat as alpha in the pipeline if < 100% transparent drawablep->setState(LLDrawable::HAS_ALPHA); } - alpha_faces.push_back(facep); + if (alpha_count < MAX_FACE_COUNT) + { + alpha_faces[alpha_count++] = facep; + } } } else @@ -4581,33 +4597,51 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) { if (te->getBumpmap()) { //needs normal + binormal - bump_faces.push_back(facep); + if (bump_count < MAX_FACE_COUNT) + { + bump_faces[bump_count++] = facep; + } } else if (te->getShiny() || !te->getFullbright()) { //needs normal - simple_faces.push_back(facep); + if (simple_count < MAX_FACE_COUNT) + { + simple_faces[simple_count++] = facep; + } } else { //doesn't need normal facep->setState(LLFace::FULLBRIGHT); - fullbright_faces.push_back(facep); + if (fullbright_count < MAX_FACE_COUNT) + { + fullbright_faces[fullbright_count++] = facep; + } } } else { if (te->getBumpmap() && LLPipeline::sRenderBump) { //needs normal + binormal - bump_faces.push_back(facep); + if (bump_count < MAX_FACE_COUNT) + { + bump_faces[bump_count++] = facep; + } } else if ((te->getShiny() && LLPipeline::sRenderBump) || !(te->getFullbright() || bake_sunlight)) { //needs normal - simple_faces.push_back(facep); + if (simple_count < MAX_FACE_COUNT) + { + simple_faces[simple_count++] = facep; + } } else { //doesn't need normal facep->setState(LLFace::FULLBRIGHT); - fullbright_faces.push_back(facep); + if (fullbright_count < MAX_FACE_COUNT) + { + fullbright_faces[fullbright_count++] = facep; + } } } } @@ -4657,17 +4691,17 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) if (batch_textures) { bump_mask |= LLVertexBuffer::MAP_BINORMAL; - genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, FALSE, TRUE); - genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, FALSE, TRUE); - genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, FALSE, FALSE); - genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, TRUE, TRUE); + genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, simple_count, FALSE, TRUE); + genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, fullbright_count, FALSE, TRUE); + genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, bump_count, FALSE, FALSE); + genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, alpha_count, TRUE, TRUE); } else { - genDrawInfo(group, simple_mask, simple_faces); - genDrawInfo(group, fullbright_mask, fullbright_faces); - genDrawInfo(group, bump_mask, bump_faces, FALSE, TRUE); - genDrawInfo(group, alpha_mask, alpha_faces, TRUE); + genDrawInfo(group, simple_mask, simple_faces, simple_count); + genDrawInfo(group, fullbright_mask, fullbright_faces, fullbright_count); + genDrawInfo(group, bump_mask, bump_faces, bump_count, FALSE, FALSE); + genDrawInfo(group, alpha_mask, alpha_faces, alpha_count, TRUE); } @@ -4699,6 +4733,7 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) } } +static LLFastTimer::DeclareTimer FTM_REBUILD_MESH_FLUSH("Flush Mesh"); void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) { @@ -4708,11 +4743,14 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) LLFastTimer ftm(FTM_REBUILD_VOLUME_VB); LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); //make sure getgeometryvolume shows up in the right place in timers - S32 num_mapped_veretx_buffer = LLVertexBuffer::sMappedCount ; - group->mBuilt = 1.f; - std::set mapped_buffers; + S32 num_mapped_vertex_buffer = LLVertexBuffer::sMappedCount ; + + const U32 MAX_BUFFER_COUNT = 4096; + LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT]; + + U32 buffer_count = 0; for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter) { @@ -4722,7 +4760,7 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) { LLVOVolume* vobj = drawablep->getVOVolume(); vobj->preRebuild(); - + if (drawablep->isState(LLDrawable::ANIMATED_CHILD)) { vobj->updateRelativeXform(true); @@ -4747,9 +4785,9 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) } - if (buff->isLocked()) + if (buff->isLocked() && buffer_count < MAX_BUFFER_COUNT) { - mapped_buffers.insert(buff); + locked_buffer[buffer_count++] = buff; } } } @@ -4765,21 +4803,24 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) } } - for (std::set::iterator iter = mapped_buffers.begin(); iter != mapped_buffers.end(); ++iter) { - (*iter)->flush(); - } - - // don't forget alpha - if(group != NULL && - !group->mVertexBuffer.isNull() && - group->mVertexBuffer->isLocked()) - { - group->mVertexBuffer->flush(); + LLFastTimer t(FTM_REBUILD_MESH_FLUSH); + for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter) + { + (*iter)->flush(); + } + + // don't forget alpha + if(group != NULL && + !group->mVertexBuffer.isNull() && + group->mVertexBuffer->isLocked()) + { + group->mVertexBuffer->flush(); + } } //if not all buffers are unmapped - if(num_mapped_veretx_buffer != LLVertexBuffer::sMappedCount) + if(num_mapped_vertex_buffer != LLVertexBuffer::sMappedCount) { llwarns << "Not all mapped vertex buffers are unmapped!" << llendl ; for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter) @@ -4839,7 +4880,7 @@ static LLFastTimer::DeclareTimer FTM_GEN_DRAW_INFO_RESIZE_VB("Resize VB"); -void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector& faces, BOOL distance_sort, BOOL batch_textures) +void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort, BOOL batch_textures) { LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); @@ -4875,17 +4916,18 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: if (!distance_sort) { //sort faces by things that break batches - std::sort(faces.begin(), faces.end(), CompareBatchBreakerModified()); + std::sort(faces, faces+face_count, CompareBatchBreakerModified()); } else { //sort faces by distance - std::sort(faces.begin(), faces.end(), LLFace::CompareDistanceGreater()); + std::sort(faces, faces+face_count, LLFace::CompareDistanceGreater()); } } bool hud_group = group->isHUDGroup() ; - std::vector::iterator face_iter = faces.begin(); + LLFace** face_iter = faces; + LLFace** end_faces = faces+face_count; LLSpatialGroup::buffer_map_t buffer_map; @@ -4916,7 +4958,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: bool flexi = false; - while (face_iter != faces.end()) + while (face_iter != end_faces) { //pull off next face LLFace* facep = *face_iter; @@ -4945,10 +4987,13 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: flexi = flexi || facep->getViewerObject()->getVolume()->isUnique(); //sum up vertices needed for this render batch - std::vector::iterator i = face_iter; + LLFace** i = face_iter; ++i; - std::vector texture_list; + const U32 MAX_TEXTURE_COUNT = 32; + LLViewerTexture* texture_list[MAX_TEXTURE_COUNT]; + + U32 texture_count = 0; { LLFastTimer t(FTM_GEN_DRAW_INFO_FACE_SIZE); @@ -4956,12 +5001,15 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: { U8 cur_tex = 0; facep->setTextureIndex(cur_tex); - texture_list.push_back(tex); - + if (texture_count < MAX_TEXTURE_COUNT) + { + texture_list[texture_count++] = tex; + } + if (can_batch_texture(facep)) { //populate texture_list with any textures that can be batched //move i to the next unbatchable face - while (i != faces.end()) + while (i != end_faces) { facep = *i; @@ -4976,7 +5024,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: if (distance_sort) { //textures might be out of order, see if texture exists in current batch bool found = false; - for (U32 tex_idx = 0; tex_idx < texture_list.size(); ++tex_idx) + for (U32 tex_idx = 0; tex_idx < texture_count; ++tex_idx) { if (facep->getTexture() == texture_list[tex_idx]) { @@ -4988,7 +5036,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: if (!found) { - cur_tex = texture_list.size(); + cur_tex = texture_count; } } else @@ -5003,7 +5051,10 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: tex = facep->getTexture(); - texture_list.push_back(tex); + if (texture_count < MAX_TEXTURE_COUNT) + { + texture_list[texture_count++] = tex; + } } if (geom_count + facep->getGeomCount() > max_vertices) @@ -5026,7 +5077,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: } else { - while (i != faces.end() && + while (i != end_faces && (LLPipeline::sTextureBindTest || (distance_sort || (*i)->getTexture() == tex))) { facep = *i; -- cgit v1.3 From c04f4f66c813181eb378b00045aec969dc2c4aae Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 12:30:16 -0700 Subject: Moved LLAlignedArray from llmath to llcommon and put template func impls in header to work around Mac 4.3.3 link issue. --- indra/llcommon/CMakeLists.txt | 1 + indra/llmath/llvolume.cpp | 87 ------------------------------------------- indra/llmath/llvolume.h | 21 +---------- 3 files changed, 2 insertions(+), 107 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index e019c17280..0c2ceebb52 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -121,6 +121,7 @@ set(llcommon_HEADER_FILES linden_common.h linked_lists.h llaccountingcost.h + llalignedarray.h llallocator.h llallocator_heap_profile.h llagentconstants.h diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 9fc72fd801..4f3e753276 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -96,93 +96,6 @@ extern BOOL gDebugGL; bool less_than_max_mag(const LLVector4a& vec); -template -LLAlignedArray::LLAlignedArray() -{ - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template -LLAlignedArray::~LLAlignedArray() -{ - ll_aligned_free(mArray); - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template -void LLAlignedArray::push_back(const T& elem) -{ - T* old_buf = NULL; - if (mCapacity <= mElementCount) - { - mCapacity++; - mCapacity *= 2; - T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - } - old_buf = mArray; - mArray = new_buf; - } - - mArray[mElementCount++] = elem; - - //delete old array here to prevent error on a.push_back(a[0]) - ll_aligned_free(old_buf); -} - -template -void LLAlignedArray::resize(U32 size) -{ - if (mCapacity < size) - { - mCapacity = size+mCapacity*2; - T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - ll_aligned_free(mArray); - } - - /*for (U32 i = mElementCount; i < mCapacity; ++i) - { - new(new_buf+i) T(); - }*/ - mArray = new_buf; - } - - mElementCount = size; -} - - -template -T& LLAlignedArray::operator[](int idx) -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template -const T& LLAlignedArray::operator[](int idx) const -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template -T* LLAlignedArray::append(S32 N) -{ - U32 sz = size(); - resize(sz+N); - return &((*this)[sz]); -} - - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 5e43af92ec..1ff53590cf 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -41,6 +41,7 @@ class LLVolumeFace; class LLVolume; class LLVolumeTriangle; +#include "llalignedarray.h" #include "lldarray.h" #include "lluuid.h" #include "v4color.h" @@ -195,26 +196,6 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128; const S32 LL_SCULPT_MESH_MAX_FACES = 8; -template -class LLAlignedArray -{ -public: - T* mArray; - U32 mElementCount; - U32 mCapacity; - - LLAlignedArray(); - ~LLAlignedArray(); - - void push_back(const T& elem); - U32 size() const { return mElementCount; } - void resize(U32 size); - T* append(S32 N); - T& operator[](int idx); - const T& operator[](int idx) const; -}; - - class LLProfileParams { public: -- cgit v1.3 From e8b8a12b7365c17cf0326be365b78bcb1da1bfac Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Tue, 12 Mar 2013 10:48:05 -0700 Subject: Mods to make compile on gcc 4.6.3 work mo betta --- indra/cmake/00-Common.cmake | 4 ++++ indra/llmath/llvolume.cpp | 5 ----- indra/llmessage/lliosocket.cpp | 1 + indra/llui/llconsole.cpp | 2 -- indra/llui/llkeywords.cpp | 2 +- indra/llui/lllayoutstack.cpp | 2 +- indra/llui/lltextbase.cpp | 3 ++- indra/llui/lltexteditor.cpp | 1 - indra/llui/lltoolbar.cpp | 4 +--- indra/lscript/lscript_execute/lscript_readlso.cpp | 6 +++--- indra/test/io.cpp | 1 + indra/test/llstreamtools_tut.cpp | 8 +++++++- indra/test/lltemplatemessagebuilder_tut.cpp | 2 ++ 13 files changed, 23 insertions(+), 18 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/cmake/00-Common.cmake b/indra/cmake/00-Common.cmake index 6bfd8c14d6..aa1d50b993 100644 --- a/indra/cmake/00-Common.cmake +++ b/indra/cmake/00-Common.cmake @@ -135,6 +135,10 @@ if (LINUX) set(CMAKE_CXX_FLAGS "-Wno-deprecated -Wno-uninitialized -Wno-unused-variable -Wno-unused-function ${CMAKE_CXX_FLAGS}") endif (${CXX_VERSION_NUMBER} LESS 420) + if(${CXX_VERSION_NUMBER} GREATER 459) + set(CMAKE_CXX_FLAGS "-Wno-deprecated -Wno-unused-but-set-variable -Wno-unused-variable ${CMAKE_CXX_FLAGS}") + endif (${CXX_VERSION_NUMBER} GREATER 459) + # gcc 4.3 and above don't like the LL boost and also # cause warnings due to our use of deprecated headers if(${CXX_VERSION_NUMBER} GREATER 429) diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index eff224743b..cb5633c1bb 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5314,12 +5314,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) S32 max_t = volume->getPath().mPath.size(); // S32 i; - S32 num_vertices = 0, num_indices = 0; S32 grid_size = (profile.size()-1)/4; - S32 quad_count = (grid_size * grid_size); - - num_vertices = (grid_size+1)*(grid_size+1); - num_indices = quad_count * 4; LLVector4a& min = mExtents[0]; LLVector4a& max = mExtents[1]; diff --git a/indra/llmessage/lliosocket.cpp b/indra/llmessage/lliosocket.cpp index 0287026659..7713e553ef 100644 --- a/indra/llmessage/lliosocket.cpp +++ b/indra/llmessage/lliosocket.cpp @@ -592,6 +592,7 @@ LLIOPipe::EStatus LLIOServerSocket::process_impl( PUMP_DEBUG; apr_pool_t* new_pool = NULL; apr_status_t status = apr_pool_create(&new_pool, mPool); + (void)status; apr_socket_t* socket = NULL; status = apr_socket_accept( &socket, diff --git a/indra/llui/llconsole.cpp b/indra/llui/llconsole.cpp index 161496b1f5..fdfaf284de 100644 --- a/indra/llui/llconsole.cpp +++ b/indra/llui/llconsole.cpp @@ -243,8 +243,6 @@ void LLConsole::draw() void LLConsole::Paragraph::makeParagraphColorSegments (const LLColor4 &color) { LLSD paragraph_color_segments; - LLColor4 lcolor=color; - paragraph_color_segments[0]["text"] =wstring_to_utf8str(mParagraphText); LLSD color_sd = color.getValue(); paragraph_color_segments[0]["color"]=color_sd; diff --git a/indra/llui/llkeywords.cpp b/indra/llui/llkeywords.cpp index c1cd04186b..537cc82302 100644 --- a/indra/llui/llkeywords.cpp +++ b/indra/llui/llkeywords.cpp @@ -368,7 +368,7 @@ void LLKeywords::findSegments(std::vector* seg_list, const LLW const llwchar* base = wtext.c_str(); const llwchar* cur = base; const llwchar* line = NULL; - + (void)line; while( *cur ) { if( *cur == '\n' || cur == base ) diff --git a/indra/llui/lllayoutstack.cpp b/indra/llui/lllayoutstack.cpp index c93f538570..32383b1f1d 100644 --- a/indra/llui/lllayoutstack.cpp +++ b/indra/llui/lllayoutstack.cpp @@ -481,7 +481,7 @@ void LLLayoutStack::createResizeBar(LLLayoutPanel* panelp) { LLResizeBar::Side side = (mOrientation == HORIZONTAL) ? LLResizeBar::RIGHT : LLResizeBar::BOTTOM; LLRect resize_bar_rect = getRect(); - + (void)resize_bar_rect; LLResizeBar::Params resize_params; resize_params.name("resize"); resize_params.resizing_view(lp); diff --git a/indra/llui/lltextbase.cpp b/indra/llui/lltextbase.cpp index 3815eec447..ec66b6df56 100644 --- a/indra/llui/lltextbase.cpp +++ b/indra/llui/lltextbase.cpp @@ -351,7 +351,6 @@ void LLTextBase::drawSelectionBackground() S32 selection_left = llmin( mSelectionStart, mSelectionEnd ); S32 selection_right = llmax( mSelectionStart, mSelectionEnd ); - LLRect selection_rect = mVisibleTextRect; // Skip through the lines we aren't drawing. LLRect content_display_rect = getVisibleDocumentRect(); @@ -2241,6 +2240,8 @@ S32 LLTextBase::getDocIndexFromLocalCoord( S32 local_x, S32 local_y, BOOL round, // Figure out which line we're nearest to. LLRect visible_region = getVisibleDocumentRect(); LLRect doc_rect = mDocumentView->getRect(); + (void)visible_region; + (void)doc_rect; S32 doc_y = local_y - doc_rect.mBottom; diff --git a/indra/llui/lltexteditor.cpp b/indra/llui/lltexteditor.cpp index 46fbd1e6a0..e4bd51c8ce 100644 --- a/indra/llui/lltexteditor.cpp +++ b/indra/llui/lltexteditor.cpp @@ -2490,7 +2490,6 @@ void LLTextEditor::updateSegments() mKeywords.findSegments(&segment_list, getWText(), mDefaultColor.get(), *this); clearSegments(); - segment_set_t::iterator insert_it = mSegments.begin(); for (segment_vec_t::iterator list_it = segment_list.begin(); list_it != segment_list.end(); ++list_it) { insertSegment(*list_it); diff --git a/indra/llui/lltoolbar.cpp b/indra/llui/lltoolbar.cpp index 63b7e452d2..62b6a0cd2f 100644 --- a/indra/llui/lltoolbar.cpp +++ b/indra/llui/lltoolbar.cpp @@ -653,7 +653,6 @@ void LLToolBar::updateLayoutAsNeeded() S32 max_row_length = 0; S32 max_length; - S32 max_total_girth; S32 cur_start; S32 cur_row ; S32 row_pad_start; @@ -664,7 +663,6 @@ void LLToolBar::updateLayoutAsNeeded() if (orientation == LLLayoutStack::HORIZONTAL) { max_length = getRect().getWidth() - mPadLeft - mPadRight; - max_total_girth = getRect().getHeight() - mPadTop - mPadBottom; row_pad_start = mPadLeft; row_pad_end = mPadRight; cur_row = mPadTop; @@ -673,7 +671,6 @@ void LLToolBar::updateLayoutAsNeeded() else // VERTICAL { max_length = getRect().getHeight() - mPadTop - mPadBottom; - max_total_girth = getRect().getWidth() - mPadLeft - mPadRight; row_pad_start = mPadTop; row_pad_end = mPadBottom; cur_row = mPadLeft; @@ -842,6 +839,7 @@ void LLToolBar::draw() { LLRect caret_rect = caret->getRect(); LLRect toolbar_rect = getRect(); + (void)toolbar_rect; if (getOrientation(mSideType) == LLLayoutStack::HORIZONTAL) { caret->setRect(LLRect(mDragx-caret_rect.getWidth()/2+1, diff --git a/indra/lscript/lscript_execute/lscript_readlso.cpp b/indra/lscript/lscript_execute/lscript_readlso.cpp index 35caa41ae1..3cdb41ac17 100644 --- a/indra/lscript/lscript_execute/lscript_readlso.cpp +++ b/indra/lscript/lscript_execute/lscript_readlso.cpp @@ -145,7 +145,7 @@ void LLScriptLSOParse::printGlobals(LLFILE *fp) // get offset to skip past name varoffset = global_v_offset; offset = bytestream2integer(mRawData, global_v_offset); - + (void)offset; //hush little compiler // get typeexport type = *(mRawData + global_v_offset++); @@ -262,8 +262,6 @@ void LLScriptLSOParse::printGlobalFunctions(LLFILE *fp) fprintf(fp, "[Function #%d] [0x%X] %s\n", function_number, orig_function_offset, name); fprintf(fp, "\tReturn Type: %s\n", LSCRIPTTypeNames[type]); type = *(mRawData + function_offset++); - S32 params; - params = 0; S32 pcount = 0; while (type) { @@ -350,6 +348,7 @@ void LLScriptLSOParse::printStates(LLFILE *fp) S32 dummy; opcode_end = worst_case_opcode_end; + (void)opcode_end; for (k = LSTT_STATE_BEGIN; k < LSTT_STATE_END; k++) { @@ -357,6 +356,7 @@ void LLScriptLSOParse::printStates(LLFILE *fp) { temp_end = bytestream2integer(mRawData, read_ahead); dummy = bytestream2integer(mRawData, read_ahead); + (void)dummy; if ( (temp_end < opcode_end) &&(temp_end > event_offset)) { diff --git a/indra/test/io.cpp b/indra/test/io.cpp index ce747f667d..7f26ac6724 100644 --- a/indra/test/io.cpp +++ b/indra/test/io.cpp @@ -1141,6 +1141,7 @@ namespace tut ensure("Connected to server", connected); lldebugs << "connected" << llendl; F32 elapsed = pump_loop(mPump,0.1f); + (void)elapsed; count = mPump->runningChains(); ensure_equals("server chain onboard", count, 2); lldebugs << "** Client is connected." << llendl; diff --git a/indra/test/llstreamtools_tut.cpp b/indra/test/llstreamtools_tut.cpp index a93f2e8f65..68e56b5ee2 100644 --- a/indra/test/llstreamtools_tut.cpp +++ b/indra/test/llstreamtools_tut.cpp @@ -386,15 +386,17 @@ namespace tut std::string actual_result; std::istringstream is; bool ret; - is.clear(); is.str(str = " First Second \t \r \n Third Fourth-ShouldThisBePartOfFourth Fifth\n"); actual_result = ""; ret = get_word(actual_result, is); // First + (void)ret; actual_result = ""; ret = get_word(actual_result, is); // Second + (void)ret; actual_result = ""; ret = get_word(actual_result, is); // Third + (void)ret; // the current implementation of get_word seems inconsistent with // skip_to_next_word. skip_to_next_word treats any character other @@ -486,6 +488,7 @@ namespace tut is.str(str = "First Second \t \r\n Third Fourth-ShouldThisBePartOfFourth IsThisFifth\n"); actual_result = ""; ret = get_line(actual_result, is); + (void)ret; expected_result = "First Second \t \r\n"; ensure_equals("get_line: 1", actual_result, expected_result); @@ -551,6 +554,7 @@ namespace tut is.str(str = "Should not skip lone \r.\r\n"); actual_result = ""; ret = get_line(actual_result, is); + (void)ret; expected_result = "Should not skip lone \r.\r\n"; ensure_equals("get_line: carriage return skipped even though not followed by newline", actual_result, expected_result); } @@ -569,6 +573,7 @@ namespace tut is.str(str = "\n"); actual_result = ""; ret = get_line(actual_result, is); + (void)ret; expected_result = "\n"; ensure_equals("get_line: Just newline", actual_result, expected_result); } @@ -588,6 +593,7 @@ namespace tut is.str(str = "First Line.\nSecond Line.\n"); actual_result = ""; ret = get_line(actual_result, is, 255); + (void)ret; expected_result = "First Line.\n"; ensure_equals("get_line: Basic Operation", actual_result, expected_result); diff --git a/indra/test/lltemplatemessagebuilder_tut.cpp b/indra/test/lltemplatemessagebuilder_tut.cpp index 6e1c82bb24..0aad3cbc15 100644 --- a/indra/test/lltemplatemessagebuilder_tut.cpp +++ b/indra/test/lltemplatemessagebuilder_tut.cpp @@ -958,11 +958,13 @@ namespace tut reader->validateMessage(buffer, builtSize, LLHost()); reader->readMessage(buffer, LLHost()); reader->getU32(_PREHASH_Test0, _PREHASH_Test0, outValue); + (void)outValue; char outBuffer[bufferSize]; memset(buffer, 0xcc, bufferSize); reader->getString(_PREHASH_Test1, _PREHASH_Test0, bufferSize, outBuffer); outValue2 = reader->getNumberOfBlocks(_PREHASH_Test1); + (void)outValue2; ensure_equals("Ensure present value ", outValue, inValue); ensure_equals("Ensure unchanged buffer ", strlen(outBuffer), 0); delete reader; -- cgit v1.3 From 5d2fea6262d91eb8d3c06d97a160ca9373b96889 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham Linden)" Date: Wed, 13 Mar 2013 10:42:40 -0700 Subject: Move fast memcpy to llcommon and use it in llalignedarray pushback on all platforms. Code Review: DaveP --- indra/llcommon/llalignedarray.h | 16 +----- indra/llcommon/llmemory.h | 116 +++++++++++++++++++++++++++++++++++++--- indra/llmath/llsimdmath.h | 28 ---------- indra/llmath/llvector4a.cpp | 50 +---------------- indra/llmath/llvector4a.h | 4 -- indra/llmath/llvolume.cpp | 11 ++-- 6 files changed, 120 insertions(+), 105 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llcommon/llalignedarray.h b/indra/llcommon/llalignedarray.h index 5e04e8050f..ed8fd31205 100644 --- a/indra/llcommon/llalignedarray.h +++ b/indra/llcommon/llalignedarray.h @@ -29,10 +29,6 @@ #include "llmemory.h" -#if LL_WINDOWS -#include "llvector4a.h" // for 16b fast copy -#endif - template class LLAlignedArray { @@ -81,11 +77,7 @@ void LLAlignedArray::push_back(const T& elem) T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); if (mArray) { -#if LL_WINDOWS - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); -#else - memcpy((F32*)new_buf, (F32*)mArray, sizeof(T)*mElementCount); -#endif + ll_memcpy_nonaliased_aligned_16((char*)new_buf, (char*)mArray, sizeof(T)*mElementCount); } old_buf = mArray; mArray = new_buf; @@ -106,11 +98,7 @@ void LLAlignedArray::resize(U32 size) T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; if (mArray) { -#if LL_WINDOWS - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); -#else - memcpy((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); -#endif + ll_memcpy_nonaliased_aligned_16((char*) new_buf, (char*) mArray, sizeof(T)*mElementCount); ll_aligned_free(mArray); } diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 46cabfadcd..61e30f11cc 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -36,6 +36,44 @@ class LLMutex ; #define LL_CHECK_MEMORY #endif +LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); + +#ifdef SHOW_ASSERT +#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) +#else +#define ll_assert_aligned(ptr,alignment) +#endif + +#include + +template T* LL_NEXT_ALIGNED_ADDRESS(T* address) +{ + return reinterpret_cast( + (reinterpret_cast(address) + 0xF) & ~0xF); +} + +template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) +{ + return reinterpret_cast( + (reinterpret_cast(address) + 0x3F) & ~0x3F); +} + +#if LL_LINUX || LL_DARWIN + +#define LL_ALIGN_PREFIX(x) +#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) + +#elif LL_WINDOWS + +#define LL_ALIGN_PREFIX(x) __declspec(align(x)) +#define LL_ALIGN_POSTFIX(x) + +#else +#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" +#endif + +#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) + inline void* ll_aligned_malloc( size_t size, int align ) { #if defined(LL_WINDOWS) @@ -144,6 +182,78 @@ inline void ll_aligned_free_32(void *p) #endif } + +// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP. +// Source and dest must be 16-byte aligned and size must be multiple of 16. +// +inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes) +{ + assert(src != NULL); + assert(dst != NULL); + assert(bytes > 0); + assert((bytes % sizeof(F32))== 0); + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + assert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src)); + assert(bytes%16==0); + + char* end = dst + bytes; + + if (bytes > 64) + { + + // Find start of 64b aligned area within block + // + void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); + + //at least 64 bytes before the end of the destination, switch to 16 byte copies + void* end_64 = end-64; + + // Prefetch the head of the 64b area now + // + _mm_prefetch((char*)begin_64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); + + // Copy 16b chunks until we're 64b aligned + // + while (dst < begin_64) + { + + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + dst += 16; + src += 16; + } + + // Copy 64b chunks up to your tail + // + // might be good to shmoo the 512b prefetch offset + // (characterize performance for various values) + // + while (dst < end_64) + { + _mm_prefetch((char*)src + 512, _MM_HINT_NTA); + _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16))); + _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32))); + _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48))); + dst += 64; + src += 64; + } + } + + // Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies) + // + while (dst < end) + { + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + dst += 16; + src += 16; + } +} + #ifndef __DEBUG_PRIVATE_MEM__ #define __DEBUG_PRIVATE_MEM__ 0 #endif @@ -552,13 +662,7 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr) // LLSingleton moved to llsingleton.h -LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); -#ifdef SHOW_ASSERT -#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) -#else -#define ll_assert_aligned(ptr,alignment) -#endif #endif diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521ec..cebd2ace7d 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,34 +39,6 @@ #include #endif -template T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0xF) & ~0xF); -} - -template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - #include #include diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..570fa41a43 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 1a478bc8de..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -93,11 +93,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary -// This assert is causing spurious referenced before set warnings on GCC 4.3.4 -// -#if !LL_LINUX ll_assert_aligned(this,16); -#endif } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 4f3e753276..e7a0bc7df7 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4729,10 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); - - swapData(new_face); + // Only swap data if we've actually optimized the mesh + // + if (new_face.mNumVertices < mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; -- cgit v1.3 From bba84a3fa9a1af87f6a8080f9093f9277feb1292 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham Linden)" Date: Wed, 13 Mar 2013 13:38:30 -0700 Subject: Cleanup per code review of prev change with DaveP --- indra/llcommon/llmemory.h | 12 ++++++------ indra/llmath/llvolume.cpp | 11 +++++++---- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 61e30f11cc..d0e4bc9e25 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -188,14 +188,14 @@ inline void ll_aligned_free_32(void *p) // inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); + llassert(src != NULL); + llassert(dst != NULL); + llassert(bytes >= 16); + llassert((bytes % sizeof(F32))== 0); + llassert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src)); + llassert(bytes%16==0); ll_assert_aligned(src,16); ll_assert_aligned(dst,16); - assert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src)); - assert(bytes%16==0); char* end = dst + bytes; diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index cb5633c1bb..edd16b5688 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4729,11 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - if (new_face.mNumVertices) + // disallow data amplification + // + if (new_face.mNumVertices <= mNumVertices) { - llassert(new_face.mNumIndices == mNumIndices); - swapData(new_face); - } + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; @@ -6731,3 +6733,4 @@ void calc_binormal_from_triangle(LLVector4a& binormal, binormal.set( 0, 1 , 0 ); } } + -- cgit v1.3 From dbfcd6c9c5709b74365c2538ba312685b09d22bf Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Tue, 7 May 2013 17:20:33 -0500 Subject: Optimization -- don't draw glow in alpha pool unless needed --- indra/llmath/llvolume.cpp | 6 +++- indra/llprimitive/llmodel.cpp | 37 +++++++++++++++++++ indra/llprimitive/llmodel.h | 4 ++- indra/newview/lldrawpoolalpha.cpp | 3 +- indra/newview/llspatialpartition.cpp | 3 +- indra/newview/llspatialpartition.h | 5 +-- indra/newview/llviewerpartsim.cpp | 2 +- indra/newview/llviewerpartsource.cpp | 2 +- indra/newview/llvopartgroup.cpp | 70 ++++++++++++++++++++++++------------ 9 files changed, 101 insertions(+), 31 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 602f2c29e5..7751ef87ee 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4520,7 +4520,11 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) S32 tc_size = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size); - LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + + if (src.mNormals) + { + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + } if(src.mTexCoords) { diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index ef6eb75a6b..e236f98fe6 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -1122,6 +1122,43 @@ void LLModel::getNormalizedScaleTranslation(LLVector3& scale_out, LLVector3& tra translation_out = mNormalizedTranslation; } +LLVector3 LLModel::getTransformedCenter(const LLMatrix4& mat) +{ + LLVector3 ret; + + if (!mVolumeFaces.empty()) + { + LLMatrix4a m; + m.loadu(mat); + + LLVector4a minv,maxv; + + LLVector4a t; + m.affineTransform(mVolumeFaces[0].mPositions[0], t); + minv = maxv = t; + + for (S32 i = 0; i < mVolumeFaces.size(); ++i) + { + LLVolumeFace& face = mVolumeFaces[i]; + + for (U32 j = 0; j < face.mNumVertices; ++j) + { + m.affineTransform(face.mPositions[j],t); + update_min_max(minv, maxv, t); + } + } + + minv.add(maxv); + minv.mul(0.5f); + + ret.set(minv.getF32ptr()); + } + + return ret; +} + + + void LLModel::setNumVolumeFaces(S32 count) { mVolumeFaces.resize(count); diff --git a/indra/llprimitive/llmodel.h b/indra/llprimitive/llmodel.h index 1cf528fd9f..aaafc55258 100644 --- a/indra/llprimitive/llmodel.h +++ b/indra/llprimitive/llmodel.h @@ -173,13 +173,15 @@ public: void optimizeVolumeFaces(); void offsetMesh( const LLVector3& pivotPoint ); void getNormalizedScaleTranslation(LLVector3& scale_out, LLVector3& translation_out); - + LLVector3 getTransformedCenter(const LLMatrix4& mat); + //reorder face list based on mMaterialList in this and reference so //order matches that of reference (material ordering touchup) bool matchMaterialOrder(LLModel* ref, int& refFaceCnt, int& modelFaceCnt ); bool isMaterialListSubset( LLModel* ref ); bool needToAddFaces( LLModel* ref, int& refFaceCnt, int& modelFaceCnt ); + std::vector mMaterialList; //data used for skin weights diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp index 6fa16825df..331744acb7 100644 --- a/indra/newview/lldrawpoolalpha.cpp +++ b/indra/newview/lldrawpoolalpha.cpp @@ -515,7 +515,8 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask) // If this alpha mesh has glow, then draw it a second time to add the destination-alpha (=glow). Interleaving these state-changing calls could be expensive, but glow must be drawn Z-sorted with alpha. if (current_shader && draw_glow_for_this_partition && - params.mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_EMISSIVE)) + params.mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_EMISSIVE) && + (!params.mParticle || params.mHasGlow)) { static LLFastTimer::DeclareTimer FTM_RENDER_ALPHA_GLOW("Alpha Glow"); LLFastTimer t(FTM_RENDER_ALPHA_GLOW); diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 1ec56eb5f8..2a1d0d223c 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -4656,7 +4656,8 @@ LLDrawInfo::LLDrawInfo(U16 start, U16 end, U32 count, U32 offset, mDistance(0.f), mDrawMode(LLRender::TRIANGLES), mBlendFuncSrc(LLRender::BF_SOURCE_ALPHA), - mBlendFuncDst(LLRender::BF_ONE_MINUS_SOURCE_ALPHA) + mBlendFuncDst(LLRender::BF_ONE_MINUS_SOURCE_ALPHA), + mHasGlow(FALSE) { mVertexBuffer->validateRange(mStart, mEnd, mCount, mOffset); diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index 08e77855c4..e9be93ce98 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -70,12 +70,12 @@ protected: public: void* operator new(size_t size) { - return ll_aligned_malloc_16(size); + return ll_aligned_malloc(size,64); } void operator delete(void* ptr) { - ll_aligned_free_16(ptr); + ll_aligned_free(ptr); } @@ -121,6 +121,7 @@ public: U32 mDrawMode; U32 mBlendFuncSrc; U32 mBlendFuncDst; + BOOL mHasGlow; struct CompareTexture { diff --git a/indra/newview/llviewerpartsim.cpp b/indra/newview/llviewerpartsim.cpp index 21f1d2619c..96cd43a8ab 100644 --- a/indra/newview/llviewerpartsim.cpp +++ b/indra/newview/llviewerpartsim.cpp @@ -387,7 +387,7 @@ void LLViewerPartGroup::updateParticles(const F32 lastdt) } // Do glow interpolation - part->mGlow.mV[3] = (U8) (lerp(part->mStartGlow, part->mEndGlow, frac)*255.f); + part->mGlow.mV[3] = (U8) llround(lerp(part->mStartGlow, part->mEndGlow, frac)*255.f); // Set the last update time to now. part->mLastUpdateTime = cur_time; diff --git a/indra/newview/llviewerpartsource.cpp b/indra/newview/llviewerpartsource.cpp index 8c49ce646d..b6bbd6140d 100644 --- a/indra/newview/llviewerpartsource.cpp +++ b/indra/newview/llviewerpartsource.cpp @@ -313,7 +313,7 @@ void LLViewerPartSourceScript::update(const F32 dt) part->mStartGlow = mPartSysData.mPartData.mStartGlow; part->mEndGlow = mPartSysData.mPartData.mEndGlow; - part->mGlow = LLColor4U(0, 0, 0, (U8) (part->mStartGlow*255.f)); + part->mGlow = LLColor4U(0, 0, 0, (U8) llround(part->mStartGlow*255.f)); if (mPartSysData.mPattern & LLPartSysData::LL_PART_SRC_PATTERN_DROP) { diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp index 53d67347d1..e5e627c1ea 100644 --- a/indra/newview/llvopartgroup.cpp +++ b/indra/newview/llvopartgroup.cpp @@ -273,6 +273,10 @@ void LLVOPartGroup::getBlendFunc(S32 idx, U32& src, U32& dst) src = part->mBlendFuncSource; dst = part->mBlendFuncDest; } + else + { + llerrs << "WTF?" << llendl; + } } LLVector3 LLVOPartGroup::getCameraPosition() const @@ -670,7 +674,7 @@ void LLVOPartGroup::getGeometry(S32 idx, } else { - pglow = LLColor4U(0, 0, 0, (U8) (255.f*part.mStartGlow)); + pglow = LLColor4U(0, 0, 0, (U8) llround(255.f*part.mStartGlow)); pcolor = part.mStartColor; } } @@ -685,10 +689,13 @@ void LLVOPartGroup::getGeometry(S32 idx, *colorsp++ = color; *colorsp++ = color; - *emissivep++ = pglow; - *emissivep++ = pglow; - *emissivep++ = part.mGlow; - *emissivep++ = part.mGlow; + //if (pglow.mV[3] || part.mGlow.mV[3]) + { //only write glow if it is not zero + *emissivep++ = pglow; + *emissivep++ = pglow; + *emissivep++ = part.mGlow; + *emissivep++ = part.mGlow; + } if (!(part.mFlags & LLPartData::LL_PART_EMISSIVE_MASK)) @@ -873,8 +880,17 @@ void LLParticlePartition::getGeometry(LLSpatialGroup* group) LLStrider cur_col = colorsp + geom_idx; LLStrider cur_glow = emissivep + geom_idx; + LLColor4U* start_glow = cur_glow.get(); + object->getGeometry(facep->getTEOffset(), cur_vert, cur_norm, cur_tc, cur_col, cur_glow, cur_idx); + BOOL has_glow = FALSE; + + if (cur_glow.get() != start_glow) + { + has_glow = TRUE; + } + llassert(facep->getGeomCount() == 4); llassert(facep->getIndicesCount() == 6); @@ -894,26 +910,32 @@ void LLParticlePartition::getGeometry(LLSpatialGroup* group) object->getBlendFunc(facep->getTEOffset(), bf_src, bf_dst); - if (idx >= 0 && - draw_vec[idx]->mTexture == facep->getTexture() && - draw_vec[idx]->mFullbright == fullbright && - draw_vec[idx]->mBlendFuncDst == bf_dst && - draw_vec[idx]->mBlendFuncSrc == bf_src) + + if (idx >= 0) { - if (draw_vec[idx]->mEnd == facep->getGeomIndex()-1) - { - batched = true; - draw_vec[idx]->mCount += facep->getIndicesCount(); - draw_vec[idx]->mEnd += facep->getGeomCount(); - draw_vec[idx]->mVSize = llmax(draw_vec[idx]->mVSize, vsize); - } - else if (draw_vec[idx]->mStart == facep->getGeomIndex()+facep->getGeomCount()+1) + LLDrawInfo* info = draw_vec[idx]; + + if (info->mTexture == facep->getTexture() && + info->mHasGlow == has_glow && + info->mFullbright == fullbright && + info->mBlendFuncDst == bf_dst && + info->mBlendFuncSrc == bf_src) { - batched = true; - draw_vec[idx]->mCount += facep->getIndicesCount(); - draw_vec[idx]->mStart -= facep->getGeomCount(); - draw_vec[idx]->mOffset = facep->getIndicesStart(); - draw_vec[idx]->mVSize = llmax(draw_vec[idx]->mVSize, vsize); + if (draw_vec[idx]->mEnd == facep->getGeomIndex()-1) + { + batched = true; + info->mCount += facep->getIndicesCount(); + info->mEnd += facep->getGeomCount(); + info->mVSize = llmax(draw_vec[idx]->mVSize, vsize); + } + else if (draw_vec[idx]->mStart == facep->getGeomIndex()+facep->getGeomCount()+1) + { + batched = true; + info->mCount += facep->getIndicesCount(); + info->mStart -= facep->getGeomCount(); + info->mOffset = facep->getIndicesStart(); + info->mVSize = llmax(draw_vec[idx]->mVSize, vsize); + } } } @@ -932,6 +954,8 @@ void LLParticlePartition::getGeometry(LLSpatialGroup* group) info->mVSize = vsize; info->mBlendFuncDst = bf_dst; info->mBlendFuncSrc = bf_src; + info->mHasGlow = has_glow; + info->mParticle = TRUE; draw_vec.push_back(info); //for alpha sorting facep->setDrawInfo(info); -- cgit v1.3 From ec5bd94d59247e600f8f8702a543f99d40930d20 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 15 May 2013 11:00:00 -0700 Subject: BUG-2581 : [simon-ll-viewer-cat] Path cut, hollow and slice break planar texture mapping on prims. Credits to DaveP --- indra/llmath/llvolume.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 7751ef87ee..317d15f84e 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5604,16 +5604,16 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector4a binormal; calc_binormal_from_triangle(binormal, *mCenter, cuv, - pos[0], tc[0], - pos[1], tc[1]); + mPositions[0], mTexCoords[0], + mPositions[1], mTexCoords[1]); binormal.normalize3fast(); LLVector4a normal; LLVector4a d0, d1; - d0.setSub(*mCenter, pos[0]); - d1.setSub(*mCenter, pos[1]); + d0.setSub(*mCenter, mPositions[0]); + d1.setSub(*mCenter, mPositions[1]); if (mTypeMask & TOP_MASK) { -- cgit v1.3 From 4e46ce5520cb3d8793921c2a0f9e67f16ad776ba Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 20 Jun 2013 17:17:28 -0500 Subject: MATBUG-204 Fix for crash in genVolumeBBoxes --- indra/llmath/llvolume.cpp | 13 ----- indra/newview/llface.cpp | 117 ++++++++++++++----------------------------- indra/newview/llface.h | 2 +- indra/newview/llvovolume.cpp | 2 +- 4 files changed, 39 insertions(+), 95 deletions(-) (limited to 'indra/llmath/llvolume.cpp') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index c4e1f0c84c..14cebfe5aa 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -7277,9 +7277,6 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe const LLVector4a& t = tan1[a]; - llassert(tan1[a].getLength3().getF32() >= 0.f); - llassert(tan2[a].getLength3().getF32() >= 0.f); - LLVector4a ncrosst; ncrosst.setCross3(n,t); @@ -7299,16 +7296,6 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe tsubn.getF32ptr()[3] = handedness; tangent[a] = tsubn; - - /* - These are going off on invalid input and hindering other debugging. - llassert(llfinite(tangent[a].getF32ptr()[0])); - llassert(llfinite(tangent[a].getF32ptr()[1])); - llassert(llfinite(tangent[a].getF32ptr()[2])); - - llassert(!llisnan(tangent[a].getF32ptr()[0])); - llassert(!llisnan(tangent[a].getF32ptr()[1])); - llassert(!llisnan(tangent[a].getF32ptr()[2]));*/ } else { //degenerate, make up a value diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index f021f4ed0f..e63e4285e7 100755 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -768,7 +768,7 @@ bool less_than_max_mag(const LLVector4a& vec) } BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat_vert_in, const LLMatrix3& mat_normal_in, BOOL global_volume) + const LLMatrix4& mat_vert_in, BOOL global_volume) { //get bounding box if (mDrawablep->isState(LLDrawable::REBUILD_VOLUME | LLDrawable::REBUILD_POSITION | LLDrawable::REBUILD_RIGGED)) @@ -777,10 +777,6 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, LLMatrix4a mat_vert; mat_vert.loadu(mat_vert_in); - LLMatrix4a mat_normal; - mat_normal.loadu(mat_normal_in); - - //VECTORIZE THIS LLVector4a min,max; if (f >= volume.getNumVolumeFaces()) @@ -797,104 +793,65 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, llassert(less_than_max_mag(max)); //min, max are in volume space, convert to drawable render space - LLVector4a center; - LLVector4a t; - t.setAdd(min, max); - t.mul(0.5f); - mat_vert.affineTransform(t, center); - LLVector4a size; - size.setSub(max, min); - size.mul(0.5f); - llassert(less_than_max_mag(min)); - llassert(less_than_max_mag(max)); + //get 8 corners of bounding box + LLVector4Logical mask[6]; - if (!global_volume) + for (U32 i = 0; i < 6; ++i) { - //VECTORIZE THIS - LLVector4a scale; - scale.load3(mDrawablep->getVObj()->getScale().mV); - size.mul(scale); + mask[i].clear(); } - // Catch potential badness from normalization before it happens - // - llassert(mat_normal.mMatrix[0].isFinite3() && (mat_normal.mMatrix[0].dot3(mat_normal.mMatrix[0]).getF32() > F_APPROXIMATELY_ZERO)); - llassert(mat_normal.mMatrix[1].isFinite3() && (mat_normal.mMatrix[1].dot3(mat_normal.mMatrix[1]).getF32() > F_APPROXIMATELY_ZERO)); - llassert(mat_normal.mMatrix[2].isFinite3() && (mat_normal.mMatrix[2].dot3(mat_normal.mMatrix[2]).getF32() > F_APPROXIMATELY_ZERO)); - - mat_normal.mMatrix[0].normalize3fast(); - mat_normal.mMatrix[1].normalize3fast(); - mat_normal.mMatrix[2].normalize3fast(); + mask[0].setElement<2>(); //001 + mask[1].setElement<1>(); //010 + mask[2].setElement<1>(); //011 + mask[2].setElement<2>(); + mask[3].setElement<0>(); //100 + mask[4].setElement<0>(); //101 + mask[4].setElement<2>(); + mask[5].setElement<0>(); //110 + mask[5].setElement<1>(); - LLVector4a v[4]; + LLVector4a v[8]; - //get 4 corners of bounding box - mat_normal.rotate(size,v[0]); + v[6] = min; + v[7] = max; - //VECTORIZE THIS - LLVector4a scale; - - scale.set(-1.f, -1.f, 1.f); - scale.mul(size); - mat_normal.rotate(scale, v[1]); - - scale.set(1.f, -1.f, -1.f); - scale.mul(size); - mat_normal.rotate(scale, v[2]); - - scale.set(-1.f, 1.f, -1.f); - scale.mul(size); - mat_normal.rotate(scale, v[3]); - - LLVector4a& newMin = mExtents[0]; - LLVector4a& newMax = mExtents[1]; - - newMin = newMax = center; - - llassert(less_than_max_mag(center)); - - for (U32 i = 0; i < 4; i++) + for (U32 i = 0; i < 6; ++i) { - LLVector4a delta; - delta.setAbs(v[i]); - LLVector4a min; - min.setSub(center, delta); - LLVector4a max; - max.setAdd(center, delta); + v[i].setSelectWithMask(mask[i], min, max); + } - newMin.setMin(newMin,min); - newMax.setMax(newMax,max); + LLVector4a tv[8]; - llassert(less_than_max_mag(newMin)); - llassert(less_than_max_mag(newMax)); + //transform bounding box into drawable space + for (U32 i = 0; i < 8; ++i) + { + mat_vert.affineTransform(v[i], tv[i]); } + + //find bounding box + LLVector4a& newMin = mExtents[0]; + LLVector4a& newMax = mExtents[1]; - if (!mDrawablep->isActive()) + newMin = newMax = tv[0]; + + for (U32 i = 1; i < 8; ++i) { - LLVector4a offset; - offset.load3(mDrawablep->getRegion()->getOriginAgent().mV); - newMin.add(offset); - newMax.add(offset); - - llassert(less_than_max_mag(newMin)); - llassert(less_than_max_mag(newMax)); + newMin.setMin(newMin, tv[i]); + newMax.setMax(newMax, tv[i]); } - t.setAdd(newMin, newMax); + LLVector4a t; + t.setAdd(newMin,newMax); t.mul(0.5f); - llassert(less_than_max_mag(t)); - - //VECTORIZE THIS mCenterLocal.set(t.getF32ptr()); - - llassert(less_than_max_mag(newMin)); - llassert(less_than_max_mag(newMax)); t.setSub(newMax,newMin); mBoundingSphereRadius = t.getLength3().getF32()*0.5f; + updateCenterAgent(); } diff --git a/indra/newview/llface.h b/indra/newview/llface.h index 0687544d53..763634a3ab 100755 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -195,7 +195,7 @@ public: void setSize(S32 numVertices, S32 num_indices = 0, bool align = false); BOOL genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat, const LLMatrix3& inv_trans_mat, BOOL global_volume = FALSE); + const LLMatrix4& mat, BOOL global_volume = FALSE); void init(LLDrawable* drawablep, LLViewerObject* objp); void destroy(); diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index eb9e1bb2d0..77a7d22ea3 100755 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -1458,7 +1458,7 @@ BOOL LLVOVolume::genBBoxes(BOOL force_global) continue; } res &= face->genVolumeBBoxes(*volume, i, - mRelativeXform, mRelativeXformInvTrans, + mRelativeXform, (mVolumeImpl && mVolumeImpl->isVolumeGlobal()) || force_global); if (rebuild) -- cgit v1.3