From a766e26db46c7c054bae1021470dbe365f2a3cb3 Mon Sep 17 00:00:00 2001 From: William Todd Stinson Date: Mon, 10 Sep 2012 10:37:02 -0700 Subject: Backing out the changes contributing to DRTVWR-167 and DRTVWR-179 from the repository. --- indra/newview/llpolymesh.cpp | 210 ++++++++++++++++++------------------------- 1 file changed, 87 insertions(+), 123 deletions(-) (limited to 'indra/newview/llpolymesh.cpp') diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp index 70f3b5335e..450f9b2be7 100644 --- a/indra/newview/llpolymesh.cpp +++ b/indra/newview/llpolymesh.cpp @@ -129,22 +129,22 @@ void LLPolyMeshSharedData::freeMeshData() { mNumVertices = 0; - ll_aligned_free_16(mBaseCoords); + delete [] mBaseCoords; mBaseCoords = NULL; - ll_aligned_free_16(mBaseNormals); + delete [] mBaseNormals; mBaseNormals = NULL; - ll_aligned_free_16(mBaseBinormals); + delete [] mBaseBinormals; mBaseBinormals = NULL; - ll_aligned_free_16(mTexCoords); + delete [] mTexCoords; mTexCoords = NULL; - ll_aligned_free_16(mDetailTexCoords); + delete [] mDetailTexCoords; mDetailTexCoords = NULL; - ll_aligned_free_16(mWeights); + delete [] mWeights; mWeights = NULL; } @@ -229,19 +229,15 @@ U32 LLPolyMeshSharedData::getNumKB() BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices ) { U32 i; - mBaseCoords = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); - mBaseNormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); - mBaseBinormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); - mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); - mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); - mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32)); + mBaseCoords = new LLVector3[ numVertices ]; + mBaseNormals = new LLVector3[ numVertices ]; + mBaseBinormals = new LLVector3[ numVertices ]; + mTexCoords = new LLVector2[ numVertices ]; + mDetailTexCoords = new LLVector2[ numVertices ]; + mWeights = new F32[ numVertices ]; for (i = 0; i < numVertices; i++) { - mBaseCoords[i].clear(); - mBaseNormals[i].clear(); - mBaseBinormals[i].clear(); - mTexCoords[i].clear(); - mWeights[i] = 0.f; + mWeights[i] = 0.f; } mNumVertices = numVertices; return TRUE; @@ -412,47 +408,39 @@ BOOL LLPolyMeshSharedData::loadMesh( const std::string& fileName ) allocateVertexData( numVertices ); - for (U16 i = 0; i < numVertices; ++i) - { - //---------------------------------------------------------------- - // Coords - //---------------------------------------------------------------- - numRead = fread(&mBaseCoords[i], sizeof(float), 3, fp); - llendianswizzle(&mBaseCoords[i], sizeof(float), 3); - if (numRead != 3) - { - llerrs << "can't read Coordinates from " << fileName << llendl; - return FALSE; - } - } - - for (U16 i = 0; i < numVertices; ++i) - { - //---------------------------------------------------------------- - // Normals - //---------------------------------------------------------------- - numRead = fread(&mBaseNormals[i], sizeof(float), 3, fp); - llendianswizzle(&mBaseNormals[i], sizeof(float), 3); - if (numRead != 3) - { - llerrs << " can't read Normals from " << fileName << llendl; - return FALSE; - } - } - - for (U16 i = 0; i < numVertices; ++i) - { - //---------------------------------------------------------------- - // Binormals - //---------------------------------------------------------------- - numRead = fread(&mBaseBinormals[i], sizeof(float), 3, fp); - llendianswizzle(&mBaseBinormals[i], sizeof(float), 3); - if (numRead != 3) - { - llerrs << " can't read Binormals from " << fileName << llendl; - return FALSE; - } - } + //---------------------------------------------------------------- + // Coords + //---------------------------------------------------------------- + numRead = fread(mBaseCoords, 3*sizeof(float), numVertices, fp); + llendianswizzle(mBaseCoords, sizeof(float), 3*numVertices); + if (numRead != numVertices) + { + llerrs << "can't read Coordinates from " << fileName << llendl; + return FALSE; + } + + //---------------------------------------------------------------- + // Normals + //---------------------------------------------------------------- + numRead = fread(mBaseNormals, 3*sizeof(float), numVertices, fp); + llendianswizzle(mBaseNormals, sizeof(float), 3*numVertices); + if (numRead != numVertices) + { + llerrs << " can't read Normals from " << fileName << llendl; + return FALSE; + } + + //---------------------------------------------------------------- + // Binormals + //---------------------------------------------------------------- + numRead = fread(mBaseBinormals, 3*sizeof(float), numVertices, fp); + llendianswizzle(mBaseBinormals, sizeof(float), 3*numVertices); + if (numRead != numVertices) + { + llerrs << " can't read Binormals from " << fileName << llendl; + return FALSE; + } + //---------------------------------------------------------------- // TexCoords @@ -779,28 +767,21 @@ LLPolyMesh::LLPolyMesh(LLPolyMeshSharedData *shared_data, LLPolyMesh *reference_ { // Allocate memory without initializing every vector // NOTE: This makes asusmptions about the size of LLVector[234] - S32 nverts = mSharedData->mNumVertices; - //make sure it's an even number of verts for alignment - nverts += nverts%2; - S32 nfloats = nverts * ( - 4 + //coords - 4 + //normals - 4 + //weights - 2 + //coords - 4 + //scaled normals - 4 + //binormals - 4); //scaled binormals - + int nverts = mSharedData->mNumVertices; + int nfloats = nverts * (2*4 + 3*3 + 2 + 4); //use 16 byte aligned vertex data to make LLPolyMesh SSE friendly mVertexData = (F32*) ll_aligned_malloc_16(nfloats*4); - S32 offset = 0; - mCoords = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; - mNormals = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; - mClothingWeights = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; - mTexCoords = (LLVector2*)(mVertexData + offset); offset += 2*nverts; - mScaledNormals = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; - mBinormals = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; - mScaledBinormals = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; + int offset = 0; + mCoords = (LLVector4*)(mVertexData + offset); offset += 4*nverts; + mNormals = (LLVector4*)(mVertexData + offset); offset += 4*nverts; + mClothingWeights = (LLVector4*)(mVertexData + offset); offset += 4*nverts; + mTexCoords = (LLVector2*)(mVertexData + offset); offset += 2*nverts; + + // these members don't need to be 16-byte aligned, but the first one might be + // read during an aligned memcpy of mTexCoords + mScaledNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; + mBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; + mScaledBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; initializeForMorph(); } } @@ -925,7 +906,7 @@ void LLPolyMesh::dumpDiagInfo() //----------------------------------------------------------------------------- // getWritableCoords() //----------------------------------------------------------------------------- -LLVector4a *LLPolyMesh::getWritableCoords() +LLVector4 *LLPolyMesh::getWritableCoords() { return mCoords; } @@ -933,7 +914,7 @@ LLVector4a *LLPolyMesh::getWritableCoords() //----------------------------------------------------------------------------- // getWritableNormals() //----------------------------------------------------------------------------- -LLVector4a *LLPolyMesh::getWritableNormals() +LLVector4 *LLPolyMesh::getWritableNormals() { return mNormals; } @@ -941,7 +922,7 @@ LLVector4a *LLPolyMesh::getWritableNormals() //----------------------------------------------------------------------------- // getWritableBinormals() //----------------------------------------------------------------------------- -LLVector4a *LLPolyMesh::getWritableBinormals() +LLVector3 *LLPolyMesh::getWritableBinormals() { return mBinormals; } @@ -950,7 +931,7 @@ LLVector4a *LLPolyMesh::getWritableBinormals() //----------------------------------------------------------------------------- // getWritableClothingWeights() //----------------------------------------------------------------------------- -LLVector4a *LLPolyMesh::getWritableClothingWeights() +LLVector4 *LLPolyMesh::getWritableClothingWeights() { return mClothingWeights; } @@ -966,7 +947,7 @@ LLVector2 *LLPolyMesh::getWritableTexCoords() //----------------------------------------------------------------------------- // getScaledNormals() //----------------------------------------------------------------------------- -LLVector4a *LLPolyMesh::getScaledNormals() +LLVector3 *LLPolyMesh::getScaledNormals() { return mScaledNormals; } @@ -974,7 +955,7 @@ LLVector4a *LLPolyMesh::getScaledNormals() //----------------------------------------------------------------------------- // getScaledBinormals() //----------------------------------------------------------------------------- -LLVector4a *LLPolyMesh::getScaledBinormals() +LLVector3 *LLPolyMesh::getScaledBinormals() { return mScaledBinormals; } @@ -985,17 +966,17 @@ LLVector4a *LLPolyMesh::getScaledBinormals() //----------------------------------------------------------------------------- void LLPolyMesh::initializeForMorph() { - LLVector4a::memcpyNonAliased16((F32*) mCoords, (F32*) mSharedData->mBaseCoords, sizeof(LLVector4a) * mSharedData->mNumVertices); - LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices); - LLVector4a::memcpyNonAliased16((F32*) mScaledNormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices); - LLVector4a::memcpyNonAliased16((F32*) mBinormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices); - LLVector4a::memcpyNonAliased16((F32*) mScaledBinormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices); - LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) mSharedData->mTexCoords, sizeof(LLVector2) * (mSharedData->mNumVertices + mSharedData->mNumVertices%2)); - - for (U32 i = 0; i < mSharedData->mNumVertices; ++i) + for (U32 i = 0; i < mSharedData->mNumVertices; ++i) { - mClothingWeights[i].clear(); + mCoords[i] = LLVector4(mSharedData->mBaseCoords[i]); + mNormals[i] = LLVector4(mSharedData->mBaseNormals[i]); } + + memcpy(mScaledNormals, mSharedData->mBaseNormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ + memcpy(mBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ + memcpy(mScaledBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ + memcpy(mTexCoords, mSharedData->mTexCoords, sizeof(LLVector2) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ + memset(mClothingWeights, 0, sizeof(LLVector4) * mSharedData->mNumVertices); } //----------------------------------------------------------------------------- @@ -1117,7 +1098,7 @@ BOOL LLPolySkeletalDistortionInfo::parseXml(LLXmlTreeNode* node) LLPolySkeletalDistortion::LLPolySkeletalDistortion(LLVOAvatar *avatarp) { mAvatar = avatarp; - mDefaultVec.splat(0.001f); + mDefaultVec.setVec(0.001f, 0.001f, 0.001f); } //----------------------------------------------------------------------------- @@ -1190,12 +1171,8 @@ BOOL LLPolySkeletalDistortion::setInfo(LLPolySkeletalDistortionInfo *info) //----------------------------------------------------------------------------- // apply() //----------------------------------------------------------------------------- -static LLFastTimer::DeclareTimer FTM_POLYSKELETAL_DISTORTION_APPLY("Skeletal Distortion"); - void LLPolySkeletalDistortion::apply( ESex avatar_sex ) { - LLFastTimer t(FTM_POLYSKELETAL_DISTORTION_APPLY); - F32 effective_weight = ( getSex() & avatar_sex ) ? mCurWeight : getDefaultWeight(); LLJoint* joint; @@ -1251,14 +1228,11 @@ LLPolyMorphData *clone_morph_param_direction(const LLPolyMorphData *src_data, { LLPolyMorphData* cloned_morph_data = new LLPolyMorphData(*src_data); cloned_morph_data->mName = name; - LLVector4a dir; - dir.load3(direction.mV); - for (U32 v=0; v < cloned_morph_data->mNumIndices; v++) { - cloned_morph_data->mCoords[v] = dir; - cloned_morph_data->mNormals[v].clear(); - cloned_morph_data->mBinormals[v].clear(); + cloned_morph_data->mCoords[v] = direction; + cloned_morph_data->mNormals[v] = LLVector3(0,0,0); + cloned_morph_data->mBinormals[v] = LLVector3(0,0,0); } return cloned_morph_data; } @@ -1269,27 +1243,17 @@ LLPolyMorphData *clone_morph_param_cleavage(const LLPolyMorphData *src_data, { LLPolyMorphData* cloned_morph_data = new LLPolyMorphData(*src_data); cloned_morph_data->mName = name; - - LLVector4a sc; - sc.splat(scale); - - LLVector4a nsc; - nsc.set(scale, -scale, scale, scale); - for (U32 v=0; v < cloned_morph_data->mNumIndices; v++) { - if (cloned_morph_data->mCoords[v][1] < 0) - { - cloned_morph_data->mCoords[v].setMul(src_data->mCoords[v],nsc); - cloned_morph_data->mNormals[v].setMul(src_data->mNormals[v],nsc); - cloned_morph_data->mBinormals[v].setMul(src_data->mBinormals[v],nsc); - } - else - { - cloned_morph_data->mCoords[v].setMul(src_data->mCoords[v],sc); - cloned_morph_data->mNormals[v].setMul(src_data->mNormals[v], sc); - cloned_morph_data->mBinormals[v].setMul(src_data->mBinormals[v],sc); - } + cloned_morph_data->mCoords[v] = src_data->mCoords[v]*scale; + cloned_morph_data->mNormals[v] = src_data->mNormals[v]*scale; + cloned_morph_data->mBinormals[v] = src_data->mBinormals[v]*scale; + if (cloned_morph_data->mCoords[v][1] < 0) + { + cloned_morph_data->mCoords[v][1] *= -1; + cloned_morph_data->mNormals[v][1] *= -1; + cloned_morph_data->mBinormals[v][1] *= -1; + } } return cloned_morph_data; } -- cgit v1.3 From 97d969a338c1e4f973eb817ba7701aff51a02ccb Mon Sep 17 00:00:00 2001 From: Oz Linden Date: Wed, 12 Sep 2012 14:36:37 -0400 Subject: initial attempt to restore changes that make removing tcmalloc possible; not tested --- indra/cmake/Copy3rdPartyLibs.cmake | 9 ++++--- indra/cmake/GooglePerfTools.cmake | 40 +++++++++++++++++++++-------- indra/cmake/LLAddBuildTest.cmake | 9 +++++++ indra/llcommon/llallocator.cpp | 2 +- indra/llcommon/llmemory.cpp | 12 +++++++++ indra/llcommon/llmemory.h | 45 +++++++++++++++++++++++---------- indra/llmath/CMakeLists.txt | 1 + indra/llmath/llcamera.h | 12 ++++----- indra/llmath/llmatrix3a.h | 2 +- indra/llmath/llmatrix4a.h | 2 +- indra/llmath/lloctree.h | 4 +-- indra/llmath/llplane.h | 4 ++- indra/llmath/llsimdmath.h | 3 +-- indra/llmath/llsimdtypes.inl | 2 ++ indra/llmath/llvector4a.cpp | 8 +++++- indra/llmath/llvector4a.h | 6 ++++- indra/llmath/llvector4a.inl | 1 + indra/llmath/llvector4logical.h | 2 ++ indra/llmath/llvolume.cpp | 45 ++++++++++++++------------------- indra/llmath/llvolumeoctree.h | 26 ++++++++++++++++--- indra/llprimitive/llmodel.cpp | 3 ++- indra/newview/CMakeLists.txt | 7 +++-- indra/newview/llappviewerwin32.cpp | 2 ++ indra/newview/lldrawable.h | 17 ++++++++++--- indra/newview/lldynamictexture.h | 12 ++++++++- indra/newview/llface.h | 11 ++++++++ indra/newview/llfloatermodelpreview.cpp | 3 ++- indra/newview/llpolymesh.cpp | 24 +++++++++--------- indra/newview/llspatialpartition.cpp | 3 +++ indra/newview/llspatialpartition.h | 36 ++++++++++++++++++++------ indra/newview/llviewercamera.h | 13 +++++++++- indra/newview/llviewerjointmesh.cpp | 6 +++-- indra/newview/llvoavatar.cpp | 2 +- indra/newview/llvoavatar.h | 12 ++++++++- indra/newview/llvoavatarself.h | 10 ++++++++ indra/newview/viewer_manifest.py | 10 +++++++- 36 files changed, 298 insertions(+), 108 deletions(-) (limited to 'indra/newview/llpolymesh.cpp') diff --git a/indra/cmake/Copy3rdPartyLibs.cmake b/indra/cmake/Copy3rdPartyLibs.cmake index 224e0a8b51..9f05c4cff2 100644 --- a/indra/cmake/Copy3rdPartyLibs.cmake +++ b/indra/cmake/Copy3rdPartyLibs.cmake @@ -57,10 +57,10 @@ if(WINDOWS) libhunspell.dll ) - if(USE_GOOGLE_PERFTOOLS) + if(USE_TCMALLOC) set(debug_files ${debug_files} libtcmalloc_minimal-debug.dll) set(release_files ${release_files} libtcmalloc_minimal.dll) - endif(USE_GOOGLE_PERFTOOLS) + endif(USE_TCMALLOC) if (FMOD) set(debug_files ${debug_files} fmod.dll) @@ -272,13 +272,16 @@ elseif(LINUX) libopenal.so libopenjpeg.so libssl.so - libtcmalloc_minimal.so libuuid.so.16 libuuid.so.16.0.22 libssl.so.1.0.0 libfontconfig.so.1.4.4 ) + if (USE_TCMALLOC) + set(release_files ${release_files} "libtcmalloc_minimal.so") + endif (USE_TCMALLOC) + if (FMOD) set(release_files ${release_files} "libfmod-3.75.so") endif (FMOD) diff --git a/indra/cmake/GooglePerfTools.cmake b/indra/cmake/GooglePerfTools.cmake index d9f91193be..09501e0406 100644 --- a/indra/cmake/GooglePerfTools.cmake +++ b/indra/cmake/GooglePerfTools.cmake @@ -1,20 +1,34 @@ # -*- cmake -*- include(Prebuilt) +# If you want to enable or disable TCMALLOC in viewer builds, this is the place. +# set ON or OFF as desired. +set (USE_TCMALLOC ON) + if (STANDALONE) include(FindGooglePerfTools) else (STANDALONE) if (WINDOWS) - use_prebuilt_binary(tcmalloc) - set(TCMALLOC_LIBRARIES - debug libtcmalloc_minimal-debug - optimized libtcmalloc_minimal) + if (USE_TCMALLOC) + use_prebuilt_binary(tcmalloc) + set(TCMALLOC_LIBRARIES + debug libtcmalloc_minimal-debug + optimized libtcmalloc_minimal) + set(TCMALLOC_LINK_FLAGS "/INCLUDE:__tcmalloc") + else (USE_TCMALLOC) + set(TCMALLOC_LIBRARIES) + set(TCMALLOC_LINK_FLAGS) + endif (USE_TCMALLOC) set(GOOGLE_PERFTOOLS_FOUND "YES") endif (WINDOWS) if (LINUX) - use_prebuilt_binary(tcmalloc) - set(TCMALLOC_LIBRARIES - tcmalloc) + if (USE_TCMALLOC) + use_prebuilt_binary(tcmalloc) + set(TCMALLOC_LIBRARIES + tcmalloc) + else (USE_TCMALLOC) + set(TCMALLOC_LIBRARIES) + endif (USE_TCMALLOC) set(PROFILER_LIBRARIES profiler) set(GOOGLE_PERFTOOLS_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include) @@ -29,13 +43,19 @@ if (GOOGLE_PERFTOOLS_FOUND) endif (GOOGLE_PERFTOOLS_FOUND) if (WINDOWS) - set(USE_GOOGLE_PERFTOOLS ON) + set(USE_GOOGLE_PERFTOOLS ON) endif (WINDOWS) if (USE_GOOGLE_PERFTOOLS) - set(TCMALLOC_FLAG -ULL_USE_TCMALLOC=1) + if (USE_TCMALLOC) + set(TCMALLOC_FLAG -DLL_USE_TCMALLOC=1) + else (USE_TCMALLOC) + set(TCMALLOC_FLAG -ULL_USE_TCMALLOC) + endif (USE_TCMALLOC) +endif (USE_GOOGLE_PERFTOOLS) + +if (USE_GOOGLE_PERFTOOLS) include_directories(${GOOGLE_PERFTOOLS_INCLUDE_DIR}) set(GOOGLE_PERFTOOLS_LIBRARIES ${TCMALLOC_LIBRARIES} ${STACKTRACE_LIBRARIES} ${PROFILER_LIBRARIES}) else (USE_GOOGLE_PERFTOOLS) - set(TCMALLOC_FLAG -ULL_USE_TCMALLOC) endif (USE_GOOGLE_PERFTOOLS) diff --git a/indra/cmake/LLAddBuildTest.cmake b/indra/cmake/LLAddBuildTest.cmake index 03ce46781c..543075db5b 100755 --- a/indra/cmake/LLAddBuildTest.cmake +++ b/indra/cmake/LLAddBuildTest.cmake @@ -214,6 +214,15 @@ FUNCTION(LL_ADD_INTEGRATION_TEST SET_TARGET_PROPERTIES(INTEGRATION_TEST_${testname} PROPERTIES COMPILE_FLAGS -I"${TUT_INCLUDE_DIR}") endif(STANDALONE) + if (WINDOWS) + SET_TARGET_PROPERTIES(INTEGRATION_TEST_${testname} + PROPERTIES + LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS ${TCMALLOC_LINK_FLAGS}" + LINK_FLAGS_DEBUG "/NODEFAULTLIB:\"LIBCMT;LIBCMTD;MSVCRT\" /INCREMENTAL:NO" + LINK_FLAGS_RELEASE "" + ) + endif (WINDOWS) + # Add link deps to the executable if(TEST_DEBUG) message(STATUS "TARGET_LINK_LIBRARIES(INTEGRATION_TEST_${testname} ${libraries})") diff --git a/indra/llcommon/llallocator.cpp b/indra/llcommon/llallocator.cpp index 6f6abefc67..87654b5b97 100644 --- a/indra/llcommon/llallocator.cpp +++ b/indra/llcommon/llallocator.cpp @@ -27,7 +27,7 @@ #include "linden_common.h" #include "llallocator.h" -#if LL_USE_TCMALLOC +#if (LL_USE_TCMALLOC && LL_USE_HEAP_PROFILER) #include "google/heap-profiler.h" #include "google/commandlineflags_public.h" diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp index 3b9758f996..afaf366668 100644 --- a/indra/llcommon/llmemory.cpp +++ b/indra/llcommon/llmemory.cpp @@ -61,6 +61,18 @@ BOOL LLMemory::sEnableMemoryFailurePrevention = FALSE; LLPrivateMemoryPoolManager::mem_allocation_info_t LLPrivateMemoryPoolManager::sMemAllocationTracker; #endif +void ll_assert_aligned_func(uintptr_t ptr,U32 alignment) +{ +#ifdef SHOW_ASSERT + // Redundant, place to set breakpoints. + if (ptr%alignment!=0) + { + llwarns << "alignment check failed" << llendl; + } + llassert(ptr%alignment==0); +#endif +} + //static void LLMemory::initClass() { diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index bbbdaa6497..9dd776ff57 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -27,7 +27,6 @@ #define LLMEMORY_H #include "llmemtype.h" -#if LL_DEBUG inline void* ll_aligned_malloc( size_t size, int align ) { void* mem = malloc( size + (align - 1) + sizeof(void*) ); @@ -43,10 +42,11 @@ inline void ll_aligned_free( void* ptr ) free( ((void**)ptr)[-1] ); } +#if !LL_USE_TCMALLOC inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16(). { #if defined(LL_WINDOWS) - return _mm_malloc(size, 16); + return _aligned_malloc(size, 16); #elif defined(LL_DARWIN) return malloc(size); // default osx malloc is 16 byte aligned. #else @@ -58,21 +58,38 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi #endif } +inline void* ll_aligned_realloc_16(void* ptr, size_t size) // returned hunk MUST be freed with ll_aligned_free_16(). +{ +#if defined(LL_WINDOWS) + return _aligned_realloc(ptr, size, 16); +#elif defined(LL_DARWIN) + return realloc(ptr,size); // default osx malloc is 16 byte aligned. +#else + return realloc(ptr,size); // FIXME not guaranteed to be aligned. +#endif +} + inline void ll_aligned_free_16(void *p) { #if defined(LL_WINDOWS) - _mm_free(p); + _aligned_free(p); #elif defined(LL_DARWIN) return free(p); #else free(p); // posix_memalign() is compatible with heap deallocator #endif } +#else // USE_TCMALLOC +// ll_aligned_foo_16 are not needed with tcmalloc +#define ll_aligned_malloc_16 malloc +#define ll_aligned_realloc_16 realloc +#define ll_aligned_free_16 free +#endif // USE_TCMALLOC inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). { #if defined(LL_WINDOWS) - return _mm_malloc(size, 32); + return _aligned_malloc(size, 32); #elif defined(LL_DARWIN) return ll_aligned_malloc( size, 32 ); #else @@ -87,22 +104,13 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi inline void ll_aligned_free_32(void *p) { #if defined(LL_WINDOWS) - _mm_free(p); + _aligned_free(p); #elif defined(LL_DARWIN) ll_aligned_free( p ); #else free(p); // posix_memalign() is compatible with heap deallocator #endif } -#else // LL_DEBUG -// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals) -#define ll_aligned_malloc( size, align ) malloc(size) -#define ll_aligned_free( ptr ) free(ptr) -#define ll_aligned_malloc_16 malloc -#define ll_aligned_free_16 free -#define ll_aligned_malloc_32 malloc -#define ll_aligned_free_32 free -#endif // LL_DEBUG #ifndef __DEBUG_PRIVATE_MEM__ #define __DEBUG_PRIVATE_MEM__ 0 @@ -512,4 +520,13 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr) // LLSingleton moved to llsingleton.h +LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); + +#ifdef SHOW_ASSERT +#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) +#else +#define ll_assert_aligned(ptr,alignment) +#endif + + #endif diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index b5e59c1ca3..5865ae030c 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -117,6 +117,7 @@ if (LL_TESTS) # INTEGRATION TESTS set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES}) # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests. + LL_ADD_INTEGRATION_TEST(alignment "" "${test_libs}") LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}") diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index ec67b91d05..0b591be622 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -60,7 +60,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD; // roll(), pitch(), yaw() // etc... - +LL_ALIGN_PREFIX(16) class LLCamera : public LLCoordFrame { @@ -108,7 +108,7 @@ public: }; private: - LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + LL_ALIGN_16(LLPlane mAgentPlanes[7]); //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP U8 mPlaneMask[8]; // 8 for alignment F32 mView; // angle between top and bottom frustum planes in radians. @@ -116,13 +116,13 @@ private: S32 mViewHeightInPixels; // for ViewHeightInPixels() only F32 mNearPlane; F32 mFarPlane; - LLPlane mLocalPlanes[4]; + LL_ALIGN_16(LLPlane mLocalPlanes[4]); F32 mFixedDistance; // Always return this distance, unless < 0 LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test F32 mFrustRadiusSquared; - LLPlane mWorldPlanes[PLANE_NUM]; - LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; + LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); + LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]); U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in @@ -208,7 +208,7 @@ protected: void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom); void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2); void calculateWorldFrustumPlanes(); -}; +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index adb7e3389d..9916cfd2da 100644 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -111,7 +111,7 @@ public: protected: - LLVector4a mColumns[3]; + LL_ALIGN_16(LLVector4a mColumns[3]); }; diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 27cf5b79f6..c4cefdb4fa 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -34,7 +34,7 @@ class LLMatrix4a { public: - LLVector4a mMatrix[4]; + LL_ALIGN_16(LLVector4a mMatrix[4]); inline void clear() { diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 1b11e83b4a..6c7744cdf1 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -88,7 +88,7 @@ public: typedef LLOctreeNode oct_node; typedef LLOctreeListener oct_listener; - /*void* operator new(size_t size) + void* operator new(size_t size) { return ll_aligned_malloc_16(size); } @@ -96,7 +96,7 @@ public: void operator delete(void* ptr) { ll_aligned_free_16(ptr); - }*/ + } LLOctreeNode( const LLVector4a& center, const LLVector4a& size, diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index a611894721..3c32441b11 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,6 +36,8 @@ // The plane normal = [A, B, C] // The closest approach = D / sqrt(A*A + B*B + C*C) + +LL_ALIGN_PREFIX(16) class LLPlane { public: @@ -94,7 +96,7 @@ public: private: LLVector4a mV; -}; +} LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index c7cdf7b32c..01458521ec 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -67,11 +67,10 @@ template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - - #include #include +#include "llmemory.h" #include "llsimdtypes.h" #include "llsimdtypes.inl" diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl index 712239e425..e905c84954 100644 --- a/indra/llmath/llsimdtypes.inl +++ b/indra/llmath/llsimdtypes.inl @@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b) inline LLSimdScalar operator-(const LLSimdScalar& a) { static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + ll_assert_aligned(signMask,16); return _mm_xor_ps(*reinterpret_cast(signMask), a); } @@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs) inline LLSimdScalar LLSimdScalar::getAbs() const { static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + ll_assert_aligned(F_ABS_MASK_4A,16); return _mm_and_ps( mQ, *reinterpret_cast(F_ABS_MASK_4A)); } diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b66b7a7076..6edeb0fefe 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,6 +24,7 @@ * $/LicenseInfo$ */ +#include "llmemory.h" #include "llmath.h" #include "llquantize.h" @@ -44,7 +45,10 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F assert(dst != NULL); assert(bytes > 0); assert((bytes % sizeof(F32))== 0); - + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + assert(bytes%16==0); + F32* end = dst + (bytes / sizeof(F32) ); if (bytes > 64) @@ -189,6 +193,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) LLVector4a oneOverDelta; { static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; + ll_assert_aligned(F_TWO_4A,16); + LLVector4a two; two.load4a( F_TWO_4A ); // Here we use _mm_rcp_ps plus one round of newton-raphson diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 596082509d..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,6 +32,7 @@ class LLRotation; #include #include "llpreprocessor.h" +#include "llmemory.h" /////////////////////////////////// // FIRST TIME USERS PLEASE READ @@ -46,6 +47,7 @@ class LLRotation; // LLVector3/LLVector4. ///////////////////////////////// +LL_ALIGN_PREFIX(16) class LLVector4a { public: @@ -82,6 +84,7 @@ public: } // Copy words 16-byte blocks from src to dst. Source and destination must not overlap. + // Source and dest must be 16-byte aligned and size must be multiple of 16. static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); //////////////////////////////////// @@ -90,6 +93,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary + ll_assert_aligned(this,16); } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) @@ -313,7 +317,7 @@ public: private: LLQuad mQ; -}; +} LL_ALIGN_POSTFIX(16); inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7ad22a5631..7c52ffef21 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -475,6 +475,7 @@ inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F3 inline LLBool32 LLVector4a::isFinite3() const { static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + ll_assert_aligned(nanOrInfMask,16); const __m128i nanOrInfMaskV = *reinterpret_cast (nanOrInfMask); const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index dd66b09d43..c5698f7cea 100644 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -27,6 +27,7 @@ #ifndef LL_VECTOR4LOGICAL_H #define LL_VECTOR4LOGICAL_H +#include "llmemory.h" //////////////////////////// // LLVector4Logical @@ -77,6 +78,7 @@ public: inline LLVector4Logical& invert() { static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + ll_assert_aligned(allOnes,16); mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); return *this; } diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index cc9744756f..11fa7080ce 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -95,17 +95,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; -void assert_aligned(void* ptr, uintptr_t alignment) -{ -#if 0 - uintptr_t t = (uintptr_t) ptr; - if (t%alignment != 0) - { - llerrs << "Alignment check failed." << llendl; - } -#endif -} - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -6962,14 +6951,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts) if (num_verts) { mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mPositions, 16); + ll_assert_aligned(mPositions, 16); mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mNormals, 16); + ll_assert_aligned(mNormals, 16); //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - assert_aligned(mTexCoords, 16); + ll_assert_aligned(mTexCoords, 16); } else { @@ -6993,14 +6982,17 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con // S32 old_size = mNumVertices*16; //positions - mPositions = (LLVector4a*) realloc(mPositions, new_size); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size); + ll_assert_aligned(mPositions,16); //normals - mNormals = (LLVector4a*) realloc(mNormals, new_size); - + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size); + ll_assert_aligned(mNormals,16); + //tex coords new_size = ((new_verts*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size); + ll_assert_aligned(mTexCoords,16); //just clear binormals @@ -7053,7 +7045,8 @@ void LLVolumeFace::pushIndex(const U16& idx) S32 old_size = ((mNumIndices*2)+0xF) & ~0xF; if (new_size != old_size) { - mIndices = (U16*) realloc(mIndices, new_size); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size); + ll_assert_aligned(mIndices,16); } mIndices[mNumIndices++] = idx; @@ -7094,12 +7087,12 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat } //allocate new buffer space - mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); - assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); - assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); - assert_aligned(mTexCoords, 16); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a)); + ll_assert_aligned(mPositions, 16); + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a)); + ll_assert_aligned(mNormals, 16); + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); + ll_assert_aligned(mTexCoords, 16); mNumVertices = new_count; @@ -7145,7 +7138,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat new_count = mNumIndices + face.mNumIndices; //allocate new index buffer - mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); //get destination address into new index buffer U16* dst_idx = mIndices+mNumIndices; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 688d91dc40..dac97b14b5 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -37,6 +37,16 @@ class LLVolumeTriangle : public LLRefCount { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeTriangle() { @@ -58,7 +68,7 @@ public: } - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mPositionGroup); const LLVector4a* mV[3]; U16 mIndex[3]; @@ -73,6 +83,16 @@ class LLVolumeOctreeListener : public LLOctreeListener { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeOctreeListener(LLOctreeNode* node); ~LLVolumeOctreeListener(); @@ -99,8 +119,8 @@ public: public: - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children }; class LLOctreeTriangleRayIntersect : public LLOctreeTraveler diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index cb32a510b8..28ed051c55 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -1026,7 +1026,8 @@ void LLModel::setVolumeFaceData( if (tc.get()) { - LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), num_verts*2*sizeof(F32)); + U32 tex_size = (num_verts*2*sizeof(F32)+0xF)&~0xF; + LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), tex_size); } else { diff --git a/indra/newview/CMakeLists.txt b/indra/newview/CMakeLists.txt index c0a252637f..2ae162a623 100644 --- a/indra/newview/CMakeLists.txt +++ b/indra/newview/CMakeLists.txt @@ -1565,8 +1565,7 @@ if (WINDOWS) set_target_properties(${VIEWER_BINARY_NAME} PROPERTIES # *TODO -reenable this once we get server usage sorted out - #LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:\"__tcmalloc\"" - LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:__tcmalloc " + LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS " LINK_FLAGS_DEBUG "/NODEFAULTLIB:\"LIBCMT;LIBCMTD;MSVCRT\" /INCREMENTAL:NO" LINK_FLAGS_RELEASE "/FORCE:MULTIPLE /MAP\"secondlife-bin.MAP\" /OPT:REF" ) @@ -1585,7 +1584,7 @@ if (WINDOWS) # In the meantime, if you have any ideas on how to easily maintain one list, either here or in viewer_manifest.py # and have the build deps get tracked *please* tell me about it. - if(USE_GOOGLE_PERFTOOLS) + if(USE_TCMALLOC) # Configure a var for tcmalloc location, if used. # Note the need to specify multiple names explicitly. set(GOOGLE_PERF_TOOLS_SOURCE @@ -1593,7 +1592,7 @@ if (WINDOWS) ${SHARED_LIB_STAGING_DIR}/RelWithDebInfo/libtcmalloc_minimal.dll ${SHARED_LIB_STAGING_DIR}/Debug/libtcmalloc_minimal-debug.dll ) - endif(USE_GOOGLE_PERFTOOLS) + endif(USE_TCMALLOC) set(COPY_INPUT_DEPENDENCIES diff --git a/indra/newview/llappviewerwin32.cpp b/indra/newview/llappviewerwin32.cpp index bad60a9757..53c77fa22e 100644 --- a/indra/newview/llappviewerwin32.cpp +++ b/indra/newview/llappviewerwin32.cpp @@ -130,6 +130,8 @@ int APIENTRY WINMAIN(HINSTANCE hInstance, // This results in a 2-3x improvement in opening a new Inventory window (which uses a large numebr of allocations) // Note: This won't work when running from the debugger unless the _NO_DEBUG_HEAP environment variable is set to 1 + // Enable to get mem debugging within visual studio. + //_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); _CrtSetDbgFlag(0); // default, just making explicit ULONG ulEnableLFH = 2; diff --git a/indra/newview/lldrawable.h b/indra/newview/lldrawable.h index e2064b79f8..8c7db61502 100644 --- a/indra/newview/lldrawable.h +++ b/indra/newview/lldrawable.h @@ -59,6 +59,7 @@ class LLViewerTexture; const U32 SILHOUETTE_HIGHLIGHT = 0; // All data for new renderer goes into this class. +LL_ALIGN_PREFIX(16) class LLDrawable : public LLRefCount { public: @@ -75,6 +76,16 @@ public: static void initClass(); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLDrawable() { init(); } MEM_TYPE_NEW(LLMemType::MTYPE_DRAWABLE); @@ -281,8 +292,8 @@ public: } EDrawableFlags; private: //aligned members - LLVector4a mExtents[2]; - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mExtents[2]); + LL_ALIGN_16(LLVector4a mPositionGroup); public: LLXformMatrix mXform; @@ -323,7 +334,7 @@ private: static U32 sNumZombieDrawables; static LLDynamicArrayPtr > sDeadList; -}; +} LL_ALIGN_POSTFIX(16); inline LLFace* LLDrawable::getFace(const S32 i) const diff --git a/indra/newview/lldynamictexture.h b/indra/newview/lldynamictexture.h index e18090545d..c51e7d1e1a 100644 --- a/indra/newview/lldynamictexture.h +++ b/indra/newview/lldynamictexture.h @@ -36,6 +36,16 @@ class LLViewerDynamicTexture : public LLViewerTexture { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + enum { LL_VIEWER_DYNAMIC_TEXTURE = LLViewerTexture::DYNAMIC_TEXTURE, @@ -85,7 +95,7 @@ protected: protected: BOOL mClamp; LLCoordGL mOrigin; - LLCamera mCamera; + LL_ALIGN_16(LLCamera mCamera); typedef std::set instance_list_t; static instance_list_t sInstances[ LLViewerDynamicTexture::ORDER_COUNT ]; diff --git a/indra/newview/llface.h b/indra/newview/llface.h index 76ea5c853a..5dca27487f 100644 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -59,6 +59,17 @@ class LLFace { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + + LLFace(const LLFace& rhs) { *this = rhs; diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp index 3fe535cbe8..a071f338ba 100755 --- a/indra/newview/llfloatermodelpreview.cpp +++ b/indra/newview/llfloatermodelpreview.cpp @@ -4774,7 +4774,8 @@ void LLModelPreview::genBuffers(S32 lod, bool include_skin_weights) if (vf.mTexCoords) { vb->getTexCoord0Strider(tc_strider); - LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32)); + S32 tex_size = (num_vertices*2*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, tex_size); } if (vf.mNormals) diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp index 450f9b2be7..0860506086 100644 --- a/indra/newview/llpolymesh.cpp +++ b/indra/newview/llpolymesh.cpp @@ -129,22 +129,22 @@ void LLPolyMeshSharedData::freeMeshData() { mNumVertices = 0; - delete [] mBaseCoords; + ll_aligned_free_16(mBaseCoords); mBaseCoords = NULL; - delete [] mBaseNormals; + ll_aligned_free_16(mBaseNormals); mBaseNormals = NULL; - delete [] mBaseBinormals; + ll_aligned_free_16(mBaseBinormals); mBaseBinormals = NULL; - delete [] mTexCoords; + ll_aligned_free_16(mTexCoords); mTexCoords = NULL; - delete [] mDetailTexCoords; + ll_aligned_free_16(mDetailTexCoords); mDetailTexCoords = NULL; - delete [] mWeights; + ll_aligned_free_16(mWeights); mWeights = NULL; } @@ -229,12 +229,12 @@ U32 LLPolyMeshSharedData::getNumKB() BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices ) { U32 i; - mBaseCoords = new LLVector3[ numVertices ]; - mBaseNormals = new LLVector3[ numVertices ]; - mBaseBinormals = new LLVector3[ numVertices ]; - mTexCoords = new LLVector2[ numVertices ]; - mDetailTexCoords = new LLVector2[ numVertices ]; - mWeights = new F32[ numVertices ]; + mBaseCoords = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); + mBaseNormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); + mBaseBinormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); + mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); + mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); + mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32)); for (i = 0; i < numVertices; i++) { mWeights[i] = 0.f; diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 325a2d3004..45ef8f1a6d 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -529,6 +529,7 @@ void LLSpatialGroup::setVisible() void LLSpatialGroup::validate() { + ll_assert_aligned(this,64); #if LL_OCTREE_PARANOIA_CHECK sg_assert(!isState(DIRTY)); @@ -1195,6 +1196,8 @@ LLSpatialGroup::LLSpatialGroup(OctreeNode* node, LLSpatialPartition* part) : mCurUpdatingSlotp(NULL), mCurUpdatingTexture (NULL) { + ll_assert_aligned(this,16); + sNodeCount++; LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index f0e4f15a83..7968c28900 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -68,6 +68,16 @@ protected: ~LLDrawInfo(); public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLDrawInfo(const LLDrawInfo& rhs) { @@ -106,7 +116,7 @@ public: F32 mPartSize; F32 mVSize; LLSpatialGroup* mGroup; - LLFace* mFace; //associated face + LL_ALIGN_16(LLFace* mFace); //associated face F32 mDistance; U32 mDrawMode; @@ -181,7 +191,7 @@ public: }; }; -LL_ALIGN_PREFIX(64) +LL_ALIGN_PREFIX(16) class LLSpatialGroup : public LLOctreeListener { friend class LLSpatialPartition; @@ -193,6 +203,16 @@ public: *this = rhs; } + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + const LLSpatialGroup& operator=(const LLSpatialGroup& rhs) { llerrs << "Illegal operation!" << llendl; @@ -370,12 +390,12 @@ public: V4_COUNT = 10 } eV4Index; - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children - LLVector4a mObjectExtents[2]; // extents (min, max) of objects in this node - LLVector4a mObjectBounds[2]; // bounding box (center, size) of objects in this node - LLVector4a mViewAngle; - LLVector4a mLastUpdateViewAngle; + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mObjectExtents[2]); // extents (min, max) of objects in this node + LL_ALIGN_16(LLVector4a mObjectBounds[2]); // bounding box (center, size) of objects in this node + LL_ALIGN_16(LLVector4a mViewAngle); + LL_ALIGN_16(LLVector4a mLastUpdateViewAngle); F32 mObjectBoxSize; //cached mObjectBounds[1].getLength3() diff --git a/indra/newview/llviewercamera.h b/indra/newview/llviewercamera.h index 184033de42..b857c7fe89 100644 --- a/indra/newview/llviewercamera.h +++ b/indra/newview/llviewercamera.h @@ -51,9 +51,19 @@ const BOOL NOT_FOR_SELECTION = FALSE; extern template class LLViewerCamera* LLSingleton::getInstance(); #endif +LL_ALIGN_PREFIX(16) class LLViewerCamera : public LLCamera, public LLSingleton { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } typedef enum { @@ -137,6 +147,7 @@ protected: S16 mZoomSubregion; public: -}; +} LL_ALIGN_POSTFIX(16); + #endif // LL_LLVIEWERCAMERA_H diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index f029ae5302..5d1aa870a3 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -729,8 +729,10 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w F32* vw = (F32*) vertex_weightsp.get(); F32* cw = (F32*) clothing_weightsp.get(); - LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2*sizeof(F32)); - LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts*sizeof(F32)); + S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size); + S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size); LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32)); } diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp index 33dc12c473..1b7009a5c2 100644 --- a/indra/newview/llvoavatar.cpp +++ b/indra/newview/llvoavatar.cpp @@ -2692,7 +2692,7 @@ void LLVOAvatar::idleUpdateMisc(bool detailed_update) if (isImpostor() && !mNeedsImpostorUpdate) { - LLVector4a ext[2]; + LL_ALIGN_16(LLVector4a ext[2]); F32 distance; LLVector3 angle; diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h index 6fb56a4c0b..4081a1408d 100644 --- a/indra/newview/llvoavatar.h +++ b/indra/newview/llvoavatar.h @@ -93,6 +93,16 @@ protected: **/ public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVOAvatar(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp); virtual void markDead(); static void initClass(); // Initialize data that's only init'd once per class. @@ -215,7 +225,7 @@ public: bool isBuilt() const { return mIsBuilt; } private: //aligned members - LLVector4a mImpostorExtents[2]; + LL_ALIGN_16(LLVector4a mImpostorExtents[2]); private: BOOL mSupportsAlphaLayers; // For backwards compatibility, TRUE for 1.23+ clients diff --git a/indra/newview/llvoavatarself.h b/indra/newview/llvoavatarself.h index 543891ca63..2b273e616c 100644 --- a/indra/newview/llvoavatarself.h +++ b/indra/newview/llvoavatarself.h @@ -49,6 +49,16 @@ class LLVOAvatarSelf : **/ public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVOAvatarSelf(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp); virtual ~LLVOAvatarSelf(); virtual void markDead(); diff --git a/indra/newview/viewer_manifest.py b/indra/newview/viewer_manifest.py index 7c6b5403e1..894d2f0925 100644 --- a/indra/newview/viewer_manifest.py +++ b/indra/newview/viewer_manifest.py @@ -1080,7 +1080,15 @@ class Linux_i686Manifest(LinuxManifest): # previous call did, without having to explicitly state the # version number. self.path("libfontconfig.so.*.*") - self.path("libtcmalloc.so*") #formerly called google perf tools + try: + self.path("libtcmalloc.so", "libtcmalloc.so") #formerly called google perf tools + self.path("libtcmalloc.so.0", "libtcmalloc.so.0") #formerly called google perf tools + self.path("libtcmalloc.so.0.1.0", "libtcmalloc.so.0.1.0") #formerly called google perf tools + pass + except: + print "tcmalloc files not found, skipping" + pass + try: self.path("libfmod-3.75.so") pass -- cgit v1.3 From 55e1557b8af1c02d6b50214f9f24d5a384aa3b3a Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 20 Sep 2012 09:54:49 -0400 Subject: reapply 668dcacd6e76: MAINT-646 Vectorize LLPolyMesh --- indra/llcharacter/llcharacter.cpp | 7 ++ indra/llcharacter/llmotioncontroller.cpp | 12 +- indra/newview/lldriverparam.cpp | 26 +++-- indra/newview/lldriverparam.h | 10 +- indra/newview/llpolymesh.cpp | 191 ++++++++++++++++++------------- indra/newview/llpolymesh.h | 52 ++++----- indra/newview/llpolymorph.cpp | 165 +++++++++++++++----------- indra/newview/llpolymorph.h | 18 +-- indra/newview/lltexlayerparams.h | 20 ++-- indra/newview/llviewervisualparam.h | 8 +- 10 files changed, 295 insertions(+), 214 deletions(-) (limited to 'indra/newview/llpolymesh.cpp') diff --git a/indra/llcharacter/llcharacter.cpp b/indra/llcharacter/llcharacter.cpp index 07f0baae51..0a6a8f9fa6 100644 --- a/indra/llcharacter/llcharacter.cpp +++ b/indra/llcharacter/llcharacter.cpp @@ -187,14 +187,20 @@ void LLCharacter::requestStopMotion( LLMotion* motion) //----------------------------------------------------------------------------- // updateMotions() //----------------------------------------------------------------------------- +static LLFastTimer::DeclareTimer FTM_UPDATE_ANIMATION("Update Animation"); +static LLFastTimer::DeclareTimer FTM_UPDATE_HIDDEN_ANIMATION("Update Hidden Anim"); +static LLFastTimer::DeclareTimer FTM_UPDATE_MOTIONS("Update Motions"); + void LLCharacter::updateMotions(e_update_t update_type) { if (update_type == HIDDEN_UPDATE) { + LLFastTimer t(FTM_UPDATE_HIDDEN_ANIMATION); mMotionController.updateMotionsMinimal(); } else { + LLFastTimer t(FTM_UPDATE_ANIMATION); // unpause if the number of outstanding pause requests has dropped to the initial one if (mMotionController.isPaused() && mPauseRequest->getNumRefs() == 1) { @@ -202,6 +208,7 @@ void LLCharacter::updateMotions(e_update_t update_type) } bool force_update = (update_type == FORCE_UPDATE); { + LLFastTimer t(FTM_UPDATE_MOTIONS); mMotionController.updateMotions(force_update); } } diff --git a/indra/llcharacter/llmotioncontroller.cpp b/indra/llcharacter/llmotioncontroller.cpp index 236b8c616d..4f6351709e 100644 --- a/indra/llcharacter/llmotioncontroller.cpp +++ b/indra/llcharacter/llmotioncontroller.cpp @@ -542,6 +542,8 @@ void LLMotionController::updateIdleActiveMotions() //----------------------------------------------------------------------------- // updateMotionsByType() //----------------------------------------------------------------------------- +static LLFastTimer::DeclareTimer FTM_MOTION_ON_UPDATE("Motion onUpdate"); + void LLMotionController::updateMotionsByType(LLMotion::LLMotionBlendType anim_type) { BOOL update_result = TRUE; @@ -699,7 +701,10 @@ void LLMotionController::updateMotionsByType(LLMotion::LLMotionBlendType anim_ty } // perform motion update - update_result = motionp->onUpdate(mAnimTime - motionp->mActivationTimestamp, last_joint_signature); + { + LLFastTimer t(FTM_MOTION_ON_UPDATE); + update_result = motionp->onUpdate(mAnimTime - motionp->mActivationTimestamp, last_joint_signature); + } } //********************** @@ -862,11 +867,12 @@ void LLMotionController::updateMotions(bool force_update) { // update additive motions updateAdditiveMotions(); + resetJointSignatures(); - + // update all regular motions updateRegularMotions(); - + if (use_quantum) { mPoseBlender.blendAndCache(TRUE); diff --git a/indra/newview/lldriverparam.cpp b/indra/newview/lldriverparam.cpp index 64eb11fc9b..885cae1737 100644 --- a/indra/newview/lldriverparam.cpp +++ b/indra/newview/lldriverparam.cpp @@ -155,6 +155,7 @@ LLDriverParam::LLDriverParam(LLVOAvatar *avatarp) : mAvatarp(avatarp), mWearablep(NULL) { + mDefaultVec.clear(); } LLDriverParam::LLDriverParam(LLWearable *wearablep) : @@ -162,6 +163,7 @@ LLDriverParam::LLDriverParam(LLWearable *wearablep) : mAvatarp(NULL), mWearablep(wearablep) { + mDefaultVec.clear(); } LLDriverParam::~LLDriverParam() @@ -341,18 +343,19 @@ F32 LLDriverParam::getTotalDistortion() return sum; } -const LLVector3 &LLDriverParam::getAvgDistortion() +const LLVector4a &LLDriverParam::getAvgDistortion() { // It's not actually correct to take the average of averages, but it good enough here. - LLVector3 sum; + LLVector4a sum; + sum.clear(); S32 count = 0; for( entry_list_t::iterator iter = mDriven.begin(); iter != mDriven.end(); iter++ ) { LLDrivenEntry* driven = &(*iter); - sum += driven->mParam->getAvgDistortion(); + sum.add(driven->mParam->getAvgDistortion()); count++; } - sum /= (F32)count; + sum.mul( 1.f/(F32)count); mDefaultVec = sum; return mDefaultVec; @@ -375,21 +378,22 @@ F32 LLDriverParam::getMaxDistortion() } -LLVector3 LLDriverParam::getVertexDistortion(S32 index, LLPolyMesh *poly_mesh) +LLVector4a LLDriverParam::getVertexDistortion(S32 index, LLPolyMesh *poly_mesh) { - LLVector3 sum; + LLVector4a sum; + sum.clear(); for( entry_list_t::iterator iter = mDriven.begin(); iter != mDriven.end(); iter++ ) { LLDrivenEntry* driven = &(*iter); - sum += driven->mParam->getVertexDistortion( index, poly_mesh ); + sum.add(driven->mParam->getVertexDistortion( index, poly_mesh )); } return sum; } -const LLVector3* LLDriverParam::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh) +const LLVector4a* LLDriverParam::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh) { mCurrentDistortionParam = NULL; - const LLVector3* v = NULL; + const LLVector4a* v = NULL; for( entry_list_t::iterator iter = mDriven.begin(); iter != mDriven.end(); iter++ ) { LLDrivenEntry* driven = &(*iter); @@ -404,7 +408,7 @@ const LLVector3* LLDriverParam::getFirstDistortion(U32 *index, LLPolyMesh **poly return v; }; -const LLVector3* LLDriverParam::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh) +const LLVector4a* LLDriverParam::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh) { llassert( mCurrentDistortionParam ); if( !mCurrentDistortionParam ) @@ -432,7 +436,7 @@ const LLVector3* LLDriverParam::getNextDistortion(U32 *index, LLPolyMesh **poly_ } // We're already in the middle of a param's distortions, so get the next one. - const LLVector3* v = driven->mParam->getNextDistortion( index, poly_mesh ); + const LLVector4a* v = driven->mParam->getNextDistortion( index, poly_mesh ); if( (!v) && (iter != mDriven.end()) ) { // This param is finished, so start the next param. It might not have any diff --git a/indra/newview/lldriverparam.h b/indra/newview/lldriverparam.h index fb1b44458c..7a4d711d4e 100644 --- a/indra/newview/lldriverparam.h +++ b/indra/newview/lldriverparam.h @@ -105,18 +105,18 @@ public: // LLViewerVisualParam Virtual functions /*virtual*/ F32 getTotalDistortion(); - /*virtual*/ const LLVector3& getAvgDistortion(); + /*virtual*/ const LLVector4a& getAvgDistortion(); /*virtual*/ F32 getMaxDistortion(); - /*virtual*/ LLVector3 getVertexDistortion(S32 index, LLPolyMesh *poly_mesh); - /*virtual*/ const LLVector3* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh); - /*virtual*/ const LLVector3* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh); + /*virtual*/ LLVector4a getVertexDistortion(S32 index, LLPolyMesh *poly_mesh); + /*virtual*/ const LLVector4a* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh); + /*virtual*/ const LLVector4a* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh); protected: F32 getDrivenWeight(const LLDrivenEntry* driven, F32 input_weight); void setDrivenWeight(LLDrivenEntry *driven, F32 driven_weight, bool upload_bake); - LLVector3 mDefaultVec; // temp holder + LLVector4a mDefaultVec; // temp holder typedef std::vector entry_list_t; entry_list_t mDriven; LLViewerVisualParam* mCurrentDistortionParam; diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp index 0860506086..c6b9aaae4b 100644 --- a/indra/newview/llpolymesh.cpp +++ b/indra/newview/llpolymesh.cpp @@ -229,15 +229,18 @@ U32 LLPolyMeshSharedData::getNumKB() BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices ) { U32 i; - mBaseCoords = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); - mBaseNormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); - mBaseBinormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); + mBaseCoords = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); + mBaseNormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); + mBaseBinormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a)); mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32)); for (i = 0; i < numVertices; i++) { - mWeights[i] = 0.f; + mBaseCoords[i].clear(); + mBaseNormals[i].clear(); + mBaseBinormals[i].clear(); + mWeights[i] = 0.f; } mNumVertices = numVertices; return TRUE; @@ -408,39 +411,47 @@ BOOL LLPolyMeshSharedData::loadMesh( const std::string& fileName ) allocateVertexData( numVertices ); - //---------------------------------------------------------------- - // Coords - //---------------------------------------------------------------- - numRead = fread(mBaseCoords, 3*sizeof(float), numVertices, fp); - llendianswizzle(mBaseCoords, sizeof(float), 3*numVertices); - if (numRead != numVertices) - { - llerrs << "can't read Coordinates from " << fileName << llendl; - return FALSE; - } - - //---------------------------------------------------------------- - // Normals - //---------------------------------------------------------------- - numRead = fread(mBaseNormals, 3*sizeof(float), numVertices, fp); - llendianswizzle(mBaseNormals, sizeof(float), 3*numVertices); - if (numRead != numVertices) - { - llerrs << " can't read Normals from " << fileName << llendl; - return FALSE; - } - - //---------------------------------------------------------------- - // Binormals - //---------------------------------------------------------------- - numRead = fread(mBaseBinormals, 3*sizeof(float), numVertices, fp); - llendianswizzle(mBaseBinormals, sizeof(float), 3*numVertices); - if (numRead != numVertices) - { - llerrs << " can't read Binormals from " << fileName << llendl; - return FALSE; - } - + for (U16 i = 0; i < numVertices; ++i) + { + //---------------------------------------------------------------- + // Coords + //---------------------------------------------------------------- + numRead = fread(&mBaseCoords[i], sizeof(float), 3, fp); + llendianswizzle(&mBaseCoords[i], sizeof(float), 3); + if (numRead != 3) + { + llerrs << "can't read Coordinates from " << fileName << llendl; + return FALSE; + } + } + + for (U16 i = 0; i < numVertices; ++i) + { + //---------------------------------------------------------------- + // Normals + //---------------------------------------------------------------- + numRead = fread(&mBaseNormals[i], sizeof(float), 3, fp); + llendianswizzle(&mBaseNormals[i], sizeof(float), 3); + if (numRead != 3) + { + llerrs << " can't read Normals from " << fileName << llendl; + return FALSE; + } + } + + for (U16 i = 0; i < numVertices; ++i) + { + //---------------------------------------------------------------- + // Binormals + //---------------------------------------------------------------- + numRead = fread(&mBaseBinormals[i], sizeof(float), 3, fp); + llendianswizzle(&mBaseBinormals[i], sizeof(float), 3); + if (numRead != 3) + { + llerrs << " can't read Binormals from " << fileName << llendl; + return FALSE; + } + } //---------------------------------------------------------------- // TexCoords @@ -767,21 +778,28 @@ LLPolyMesh::LLPolyMesh(LLPolyMeshSharedData *shared_data, LLPolyMesh *reference_ { // Allocate memory without initializing every vector // NOTE: This makes asusmptions about the size of LLVector[234] - int nverts = mSharedData->mNumVertices; - int nfloats = nverts * (2*4 + 3*3 + 2 + 4); + S32 nverts = mSharedData->mNumVertices; + //make sure it's an even number of verts for alignment + nverts += nverts%2; + S32 nfloats = nverts * ( + 4 + //coords + 4 + //normals + 4 + //weights + 2 + //coords + 4 + //scaled normals + 4 + //binormals + 4); //scaled binormals + //use 16 byte aligned vertex data to make LLPolyMesh SSE friendly mVertexData = (F32*) ll_aligned_malloc_16(nfloats*4); - int offset = 0; - mCoords = (LLVector4*)(mVertexData + offset); offset += 4*nverts; - mNormals = (LLVector4*)(mVertexData + offset); offset += 4*nverts; - mClothingWeights = (LLVector4*)(mVertexData + offset); offset += 4*nverts; - mTexCoords = (LLVector2*)(mVertexData + offset); offset += 2*nverts; - - // these members don't need to be 16-byte aligned, but the first one might be - // read during an aligned memcpy of mTexCoords - mScaledNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; - mBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; - mScaledBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; + S32 offset = 0; + mCoords = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; + mNormals = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; + mClothingWeights = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; + mTexCoords = (LLVector2*)(mVertexData + offset); offset += 2*nverts; + mScaledNormals = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; + mBinormals = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; + mScaledBinormals = (LLVector4a*)(mVertexData + offset); offset += 4*nverts; initializeForMorph(); } } @@ -906,7 +924,7 @@ void LLPolyMesh::dumpDiagInfo() //----------------------------------------------------------------------------- // getWritableCoords() //----------------------------------------------------------------------------- -LLVector4 *LLPolyMesh::getWritableCoords() +LLVector4a *LLPolyMesh::getWritableCoords() { return mCoords; } @@ -914,7 +932,7 @@ LLVector4 *LLPolyMesh::getWritableCoords() //----------------------------------------------------------------------------- // getWritableNormals() //----------------------------------------------------------------------------- -LLVector4 *LLPolyMesh::getWritableNormals() +LLVector4a *LLPolyMesh::getWritableNormals() { return mNormals; } @@ -922,7 +940,7 @@ LLVector4 *LLPolyMesh::getWritableNormals() //----------------------------------------------------------------------------- // getWritableBinormals() //----------------------------------------------------------------------------- -LLVector3 *LLPolyMesh::getWritableBinormals() +LLVector4a *LLPolyMesh::getWritableBinormals() { return mBinormals; } @@ -931,7 +949,7 @@ LLVector3 *LLPolyMesh::getWritableBinormals() //----------------------------------------------------------------------------- // getWritableClothingWeights() //----------------------------------------------------------------------------- -LLVector4 *LLPolyMesh::getWritableClothingWeights() +LLVector4a *LLPolyMesh::getWritableClothingWeights() { return mClothingWeights; } @@ -947,7 +965,7 @@ LLVector2 *LLPolyMesh::getWritableTexCoords() //----------------------------------------------------------------------------- // getScaledNormals() //----------------------------------------------------------------------------- -LLVector3 *LLPolyMesh::getScaledNormals() +LLVector4a *LLPolyMesh::getScaledNormals() { return mScaledNormals; } @@ -955,7 +973,7 @@ LLVector3 *LLPolyMesh::getScaledNormals() //----------------------------------------------------------------------------- // getScaledBinormals() //----------------------------------------------------------------------------- -LLVector3 *LLPolyMesh::getScaledBinormals() +LLVector4a *LLPolyMesh::getScaledBinormals() { return mScaledBinormals; } @@ -966,17 +984,17 @@ LLVector3 *LLPolyMesh::getScaledBinormals() //----------------------------------------------------------------------------- void LLPolyMesh::initializeForMorph() { - for (U32 i = 0; i < mSharedData->mNumVertices; ++i) + LLVector4a::memcpyNonAliased16((F32*) mCoords, (F32*) mSharedData->mBaseCoords, sizeof(LLVector4a) * mSharedData->mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mScaledNormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mBinormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mScaledBinormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) mSharedData->mTexCoords, sizeof(LLVector2) * (mSharedData->mNumVertices + mSharedData->mNumVertices%2)); + + for (U32 i = 0; i < mSharedData->mNumVertices; ++i) { - mCoords[i] = LLVector4(mSharedData->mBaseCoords[i]); - mNormals[i] = LLVector4(mSharedData->mBaseNormals[i]); + mClothingWeights[i].clear(); } - - memcpy(mScaledNormals, mSharedData->mBaseNormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ - memcpy(mBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ - memcpy(mScaledBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ - memcpy(mTexCoords, mSharedData->mTexCoords, sizeof(LLVector2) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ - memset(mClothingWeights, 0, sizeof(LLVector4) * mSharedData->mNumVertices); } //----------------------------------------------------------------------------- @@ -1098,7 +1116,7 @@ BOOL LLPolySkeletalDistortionInfo::parseXml(LLXmlTreeNode* node) LLPolySkeletalDistortion::LLPolySkeletalDistortion(LLVOAvatar *avatarp) { mAvatar = avatarp; - mDefaultVec.setVec(0.001f, 0.001f, 0.001f); + mDefaultVec.splat(0.001f); } //----------------------------------------------------------------------------- @@ -1171,8 +1189,12 @@ BOOL LLPolySkeletalDistortion::setInfo(LLPolySkeletalDistortionInfo *info) //----------------------------------------------------------------------------- // apply() //----------------------------------------------------------------------------- +static LLFastTimer::DeclareTimer FTM_POLYSKELETAL_DISTORTION_APPLY("Skeletal Distortion"); + void LLPolySkeletalDistortion::apply( ESex avatar_sex ) { + LLFastTimer t(FTM_POLYSKELETAL_DISTORTION_APPLY); + F32 effective_weight = ( getSex() & avatar_sex ) ? mCurWeight : getDefaultWeight(); LLJoint* joint; @@ -1228,11 +1250,14 @@ LLPolyMorphData *clone_morph_param_direction(const LLPolyMorphData *src_data, { LLPolyMorphData* cloned_morph_data = new LLPolyMorphData(*src_data); cloned_morph_data->mName = name; + LLVector4a dir; + dir.load3(direction.mV); + for (U32 v=0; v < cloned_morph_data->mNumIndices; v++) { - cloned_morph_data->mCoords[v] = direction; - cloned_morph_data->mNormals[v] = LLVector3(0,0,0); - cloned_morph_data->mBinormals[v] = LLVector3(0,0,0); + cloned_morph_data->mCoords[v] = dir; + cloned_morph_data->mNormals[v].clear(); + cloned_morph_data->mBinormals[v].clear(); } return cloned_morph_data; } @@ -1243,17 +1268,27 @@ LLPolyMorphData *clone_morph_param_cleavage(const LLPolyMorphData *src_data, { LLPolyMorphData* cloned_morph_data = new LLPolyMorphData(*src_data); cloned_morph_data->mName = name; + + LLVector4a sc; + sc.splat(scale); + + LLVector4a nsc; + nsc.set(scale, -scale, scale, scale); + for (U32 v=0; v < cloned_morph_data->mNumIndices; v++) { - cloned_morph_data->mCoords[v] = src_data->mCoords[v]*scale; - cloned_morph_data->mNormals[v] = src_data->mNormals[v]*scale; - cloned_morph_data->mBinormals[v] = src_data->mBinormals[v]*scale; - if (cloned_morph_data->mCoords[v][1] < 0) - { - cloned_morph_data->mCoords[v][1] *= -1; - cloned_morph_data->mNormals[v][1] *= -1; - cloned_morph_data->mBinormals[v][1] *= -1; - } + if (cloned_morph_data->mCoords[v][1] < 0) + { + cloned_morph_data->mCoords[v].setMul(src_data->mCoords[v],nsc); + cloned_morph_data->mNormals[v].setMul(src_data->mNormals[v],nsc); + cloned_morph_data->mBinormals[v].setMul(src_data->mBinormals[v],nsc); + } + else + { + cloned_morph_data->mCoords[v].setMul(src_data->mCoords[v],sc); + cloned_morph_data->mNormals[v].setMul(src_data->mNormals[v], sc); + cloned_morph_data->mBinormals[v].setMul(src_data->mBinormals[v],sc); + } } return cloned_morph_data; } diff --git a/indra/newview/llpolymesh.h b/indra/newview/llpolymesh.h index ba2bf85570..ffb11a3f7e 100644 --- a/indra/newview/llpolymesh.h +++ b/indra/newview/llpolymesh.h @@ -73,9 +73,9 @@ private: // vertex data S32 mNumVertices; - LLVector3 *mBaseCoords; - LLVector3 *mBaseNormals; - LLVector3 *mBaseBinormals; + LLVector4a *mBaseCoords; + LLVector4a *mBaseNormals; + LLVector4a *mBaseBinormals; LLVector2 *mTexCoords; LLVector2 *mDetailTexCoords; F32 *mWeights; @@ -217,41 +217,41 @@ public: } // Get coords - const LLVector4 *getCoords() const{ + const LLVector4a *getCoords() const{ return mCoords; } // non const version - LLVector4 *getWritableCoords(); + LLVector4a *getWritableCoords(); // Get normals - const LLVector4 *getNormals() const{ + const LLVector4a *getNormals() const{ return mNormals; } // Get normals - const LLVector3 *getBinormals() const{ + const LLVector4a *getBinormals() const{ return mBinormals; } // Get base mesh normals - const LLVector3 *getBaseNormals() const{ + const LLVector4a *getBaseNormals() const{ llassert(mSharedData); return mSharedData->mBaseNormals; } // Get base mesh normals - const LLVector3 *getBaseBinormals() const{ + const LLVector4a *getBaseBinormals() const{ llassert(mSharedData); return mSharedData->mBaseBinormals; } // intermediate morphed normals and output normals - LLVector4 *getWritableNormals(); - LLVector3 *getScaledNormals(); + LLVector4a *getWritableNormals(); + LLVector4a *getScaledNormals(); - LLVector3 *getWritableBinormals(); - LLVector3 *getScaledBinormals(); + LLVector4a *getWritableBinormals(); + LLVector4a *getScaledBinormals(); // Get texCoords const LLVector2 *getTexCoords() const { @@ -275,9 +275,9 @@ public: F32 *getWritableWeights() const; - LLVector4 *getWritableClothingWeights(); + LLVector4a *getWritableClothingWeights(); - const LLVector4 *getClothingWeights() + const LLVector4a *getClothingWeights() { return mClothingWeights; } @@ -341,17 +341,17 @@ protected: // Single array of floats for allocation / deletion F32 *mVertexData; // deformed vertices (resulting from application of morph targets) - LLVector4 *mCoords; + LLVector4a *mCoords; // deformed normals (resulting from application of morph targets) - LLVector3 *mScaledNormals; + LLVector4a *mScaledNormals; // output normals (after normalization) - LLVector4 *mNormals; + LLVector4a *mNormals; // deformed binormals (resulting from application of morph targets) - LLVector3 *mScaledBinormals; + LLVector4a *mScaledBinormals; // output binormals (after normalization) - LLVector3 *mBinormals; + LLVector4a *mBinormals; // weight values that mark verts as clothing/skin - LLVector4 *mClothingWeights; + LLVector4a *mClothingWeights; // output texture coordinates LLVector2 *mTexCoords; @@ -419,17 +419,17 @@ public: // LLViewerVisualParam Virtual functions /*virtual*/ F32 getTotalDistortion() { return 0.1f; } - /*virtual*/ const LLVector3& getAvgDistortion() { return mDefaultVec; } + /*virtual*/ const LLVector4a& getAvgDistortion() { return mDefaultVec; } /*virtual*/ F32 getMaxDistortion() { return 0.1f; } - /*virtual*/ LLVector3 getVertexDistortion(S32 index, LLPolyMesh *poly_mesh){return LLVector3(0.001f, 0.001f, 0.001f);} - /*virtual*/ const LLVector3* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh){index = 0; poly_mesh = NULL; return &mDefaultVec;}; - /*virtual*/ const LLVector3* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh){index = 0; poly_mesh = NULL; return NULL;}; + /*virtual*/ LLVector4a getVertexDistortion(S32 index, LLPolyMesh *poly_mesh){return LLVector4a(0.001f, 0.001f, 0.001f);} + /*virtual*/ const LLVector4a* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh){index = 0; poly_mesh = NULL; return &mDefaultVec;}; + /*virtual*/ const LLVector4a* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh){index = 0; poly_mesh = NULL; return NULL;}; protected: typedef std::map joint_vec_map_t; joint_vec_map_t mJointScales; joint_vec_map_t mJointOffsets; - LLVector3 mDefaultVec; + LLVector4a mDefaultVec; // Backlink only; don't make this an LLPointer. LLVOAvatar *mAvatar; }; diff --git a/indra/newview/llpolymorph.cpp b/indra/newview/llpolymorph.cpp index cefd7df3fe..69109a9ab1 100644 --- a/indra/newview/llpolymorph.cpp +++ b/indra/newview/llpolymorph.cpp @@ -48,7 +48,7 @@ LLPolyMorphData::LLPolyMorphData(const std::string& morph_name) mNumIndices = 0; mCurrentIndex = 0; mTotalDistortion = 0.f; - mAvgDistortion.zeroVec(); + mAvgDistortion.clear(); mMaxDistortion = 0.f; mVertexIndices = NULL; mCoords = NULL; @@ -73,9 +73,9 @@ LLPolyMorphData::LLPolyMorphData(const LLPolyMorphData &rhs) : { const S32 numVertices = mNumIndices; - mCoords = new LLVector3[numVertices]; - mNormals = new LLVector3[numVertices]; - mBinormals = new LLVector3[numVertices]; + mCoords = new LLVector4a[numVertices]; + mNormals = new LLVector4a[numVertices]; + mBinormals = new LLVector4a[numVertices]; mTexCoords = new LLVector2[numVertices]; mVertexIndices = new U32[numVertices]; @@ -121,16 +121,16 @@ BOOL LLPolyMorphData::loadBinary(LLFILE *fp, LLPolyMeshSharedData *mesh) //------------------------------------------------------------------------- // allocate vertices //------------------------------------------------------------------------- - mCoords = new LLVector3[numVertices]; - mNormals = new LLVector3[numVertices]; - mBinormals = new LLVector3[numVertices]; + mCoords = new LLVector4a[numVertices]; + mNormals = new LLVector4a[numVertices]; + mBinormals = new LLVector4a[numVertices]; mTexCoords = new LLVector2[numVertices]; // Actually, we are allocating more space than we need for the skiplist mVertexIndices = new U32[numVertices]; mNumIndices = 0; mTotalDistortion = 0.f; mMaxDistortion = 0.f; - mAvgDistortion.zeroVec(); + mAvgDistortion.clear(); mMesh = mesh; //------------------------------------------------------------------------- @@ -152,36 +152,36 @@ BOOL LLPolyMorphData::loadBinary(LLFILE *fp, LLPolyMeshSharedData *mesh) } - numRead = fread(&mCoords[v].mV, sizeof(F32), 3, fp); - llendianswizzle(&mCoords[v].mV, sizeof(F32), 3); + numRead = fread(&mCoords[v], sizeof(F32), 3, fp); + llendianswizzle(&mCoords[v], sizeof(F32), 3); if (numRead != 3) { llwarns << "Can't read morph target vertex coordinates" << llendl; return FALSE; } - F32 magnitude = mCoords[v].magVec(); + F32 magnitude = mCoords[v].getLength3().getF32(); mTotalDistortion += magnitude; - mAvgDistortion.mV[VX] += fabs(mCoords[v].mV[VX]); - mAvgDistortion.mV[VY] += fabs(mCoords[v].mV[VY]); - mAvgDistortion.mV[VZ] += fabs(mCoords[v].mV[VZ]); + LLVector4a t; + t.setAbs(mCoords[v]); + mAvgDistortion.add(t); if (magnitude > mMaxDistortion) { mMaxDistortion = magnitude; } - numRead = fread(&mNormals[v].mV, sizeof(F32), 3, fp); - llendianswizzle(&mNormals[v].mV, sizeof(F32), 3); + numRead = fread(&mNormals[v], sizeof(F32), 3, fp); + llendianswizzle(&mNormals[v], sizeof(F32), 3); if (numRead != 3) { llwarns << "Can't read morph target normal" << llendl; return FALSE; } - numRead = fread(&mBinormals[v].mV, sizeof(F32), 3, fp); - llendianswizzle(&mBinormals[v].mV, sizeof(F32), 3); + numRead = fread(&mBinormals[v], sizeof(F32), 3, fp); + llendianswizzle(&mBinormals[v], sizeof(F32), 3); if (numRead != 3) { llwarns << "Can't read morph target binormal" << llendl; @@ -200,8 +200,8 @@ BOOL LLPolyMorphData::loadBinary(LLFILE *fp, LLPolyMeshSharedData *mesh) mNumIndices++; } - mAvgDistortion = mAvgDistortion * (1.f/(F32)mNumIndices); - mAvgDistortion.normVec(); + mAvgDistortion.mul(1.f/(F32)mNumIndices); + mAvgDistortion.normalize3fast(); return TRUE; } @@ -367,9 +367,9 @@ BOOL LLPolyMorphTarget::parseData(LLXmlTreeNode* node) //----------------------------------------------------------------------------- // getVertexDistortion() //----------------------------------------------------------------------------- -LLVector3 LLPolyMorphTarget::getVertexDistortion(S32 requested_index, LLPolyMesh *mesh) +LLVector4a LLPolyMorphTarget::getVertexDistortion(S32 requested_index, LLPolyMesh *mesh) { - if (!mMorphData || mMesh != mesh) return LLVector3::zero; + if (!mMorphData || mMesh != mesh) return LLVector4a::getZero(); for(U32 index = 0; index < mMorphData->mNumIndices; index++) { @@ -379,17 +379,17 @@ LLVector3 LLPolyMorphTarget::getVertexDistortion(S32 requested_index, LLPolyMesh } } - return LLVector3::zero; + return LLVector4a::getZero(); } //----------------------------------------------------------------------------- // getFirstDistortion() //----------------------------------------------------------------------------- -const LLVector3 *LLPolyMorphTarget::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh) +const LLVector4a *LLPolyMorphTarget::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh) { - if (!mMorphData) return &LLVector3::zero; + if (!mMorphData) return &LLVector4a::getZero(); - LLVector3* resultVec; + LLVector4a* resultVec; mMorphData->mCurrentIndex = 0; if (mMorphData->mNumIndices) { @@ -411,11 +411,11 @@ const LLVector3 *LLPolyMorphTarget::getFirstDistortion(U32 *index, LLPolyMesh ** //----------------------------------------------------------------------------- // getNextDistortion() //----------------------------------------------------------------------------- -const LLVector3 *LLPolyMorphTarget::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh) +const LLVector4a *LLPolyMorphTarget::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh) { - if (!mMorphData) return &LLVector3::zero; + if (!mMorphData) return &LLVector4a::getZero(); - LLVector3* resultVec; + LLVector4a* resultVec; mMorphData->mCurrentIndex++; if (mMorphData->mCurrentIndex < mMorphData->mNumIndices) { @@ -451,7 +451,7 @@ F32 LLPolyMorphTarget::getTotalDistortion() //----------------------------------------------------------------------------- // getAvgDistortion() //----------------------------------------------------------------------------- -const LLVector3& LLPolyMorphTarget::getAvgDistortion() +const LLVector4a& LLPolyMorphTarget::getAvgDistortion() { if (mMorphData) { @@ -459,7 +459,7 @@ const LLVector3& LLPolyMorphTarget::getAvgDistortion() } else { - return LLVector3::zero; + return LLVector4a::getZero(); } } @@ -481,6 +481,8 @@ F32 LLPolyMorphTarget::getMaxDistortion() //----------------------------------------------------------------------------- // apply() //----------------------------------------------------------------------------- +static LLFastTimer::DeclareTimer FTM_APPLY_MORPH_TARGET("Apply Morph"); + void LLPolyMorphTarget::apply( ESex avatar_sex ) { if (!mMorphData || mNumMorphMasksPending > 0) @@ -488,6 +490,8 @@ void LLPolyMorphTarget::apply( ESex avatar_sex ) return; } + LLFastTimer t(FTM_APPLY_MORPH_TARGET); + mLastSex = avatar_sex; // Check for NaN condition (NaN is detected if a variable doesn't equal itself. @@ -508,15 +512,15 @@ void LLPolyMorphTarget::apply( ESex avatar_sex ) if (delta_weight != 0.f) { llassert(!mMesh->isLOD()); - LLVector4 *coords = mMesh->getWritableCoords(); + LLVector4a *coords = mMesh->getWritableCoords(); - LLVector3 *scaled_normals = mMesh->getScaledNormals(); - LLVector4 *normals = mMesh->getWritableNormals(); + LLVector4a *scaled_normals = mMesh->getScaledNormals(); + LLVector4a *normals = mMesh->getWritableNormals(); - LLVector3 *scaled_binormals = mMesh->getScaledBinormals(); - LLVector3 *binormals = mMesh->getWritableBinormals(); + LLVector4a *scaled_binormals = mMesh->getScaledBinormals(); + LLVector4a *binormals = mMesh->getWritableBinormals(); - LLVector4 *clothing_weights = mMesh->getWritableClothingWeights(); + LLVector4a *clothing_weights = mMesh->getWritableClothingWeights(); LLVector2 *tex_coords = mMesh->getWritableTexCoords(); F32 *maskWeightArray = (mVertMask) ? mVertMask->getMorphMaskWeights() : NULL; @@ -531,31 +535,38 @@ void LLPolyMorphTarget::apply( ESex avatar_sex ) maskWeight = maskWeightArray[vert_index_morph]; } - coords[vert_index_mesh] += LLVector4(mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight); + + LLVector4a pos = mMorphData->mCoords[vert_index_morph]; + pos.mul(delta_weight*maskWeight); + coords[vert_index_mesh].add(pos); if (getInfo()->mIsClothingMorph && clothing_weights) { - LLVector3 clothing_offset = mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight; - LLVector4* clothing_weight = &clothing_weights[vert_index_mesh]; - clothing_weight->mV[VX] += clothing_offset.mV[VX]; - clothing_weight->mV[VY] += clothing_offset.mV[VY]; - clothing_weight->mV[VZ] += clothing_offset.mV[VZ]; - clothing_weight->mV[VW] = maskWeight; + LLVector4a clothing_offset = mMorphData->mCoords[vert_index_morph]; + clothing_offset.mul(delta_weight * maskWeight); + LLVector4a* clothing_weight = &clothing_weights[vert_index_mesh]; + clothing_weight->add(clothing_offset); + clothing_weight->getF32ptr()[VW] = maskWeight; } // calculate new normals based on half angles - scaled_normals[vert_index_mesh] += mMorphData->mNormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR; - LLVector3 normalized_normal = scaled_normals[vert_index_mesh]; - normalized_normal.normVec(); - normals[vert_index_mesh] = LLVector4(normalized_normal); + LLVector4a norm = mMorphData->mNormals[vert_index_morph]; + norm.mul(delta_weight*maskWeight*NORMAL_SOFTEN_FACTOR); + scaled_normals[vert_index_mesh].add(norm); + norm = scaled_normals[vert_index_mesh]; + norm.normalize3fast(); + normals[vert_index_mesh] = norm; // calculate new binormals - scaled_binormals[vert_index_mesh] += mMorphData->mBinormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR; - LLVector3 tangent = scaled_binormals[vert_index_mesh] % normalized_normal; - LLVector3 normalized_binormal = normalized_normal % tangent; - normalized_binormal.normVec(); - binormals[vert_index_mesh] = normalized_binormal; - + LLVector4a binorm = mMorphData->mBinormals[vert_index_morph]; + binorm.mul(delta_weight*maskWeight*NORMAL_SOFTEN_FACTOR); + scaled_binormals[vert_index_mesh].add(binorm); + LLVector4a tangent; + tangent.setCross3(scaled_binormals[vert_index_mesh], norm); + LLVector4a& normalized_binormal = binormals[vert_index_mesh]; + normalized_binormal.setCross3(norm, tangent); + normalized_binormal.normalize3fast(); + tex_coords[vert_index_mesh] += mMorphData->mTexCoords[vert_index_morph] * delta_weight * maskWeight; } @@ -582,7 +593,7 @@ void LLPolyMorphTarget::apply( ESex avatar_sex ) //----------------------------------------------------------------------------- void LLPolyMorphTarget::applyMask(U8 *maskTextureData, S32 width, S32 height, S32 num_components, BOOL invert) { - LLVector4 *clothing_weights = getInfo()->mIsClothingMorph ? mMesh->getWritableClothingWeights() : NULL; + LLVector4a *clothing_weights = getInfo()->mIsClothingMorph ? mMesh->getWritableClothingWeights() : NULL; if (!mVertMask) { @@ -596,29 +607,47 @@ void LLPolyMorphTarget::applyMask(U8 *maskTextureData, S32 width, S32 height, S3 if (maskWeights) { - LLVector4 *coords = mMesh->getWritableCoords(); - LLVector3 *scaled_normals = mMesh->getScaledNormals(); - LLVector3 *scaled_binormals = mMesh->getScaledBinormals(); + LLVector4a *coords = mMesh->getWritableCoords(); + LLVector4a *scaled_normals = mMesh->getScaledNormals(); + LLVector4a *scaled_binormals = mMesh->getScaledBinormals(); LLVector2 *tex_coords = mMesh->getWritableTexCoords(); + LLVector4Logical clothing_mask; + clothing_mask.clear(); + clothing_mask.setElement<0>(); + clothing_mask.setElement<1>(); + clothing_mask.setElement<2>(); + + for(U32 vert = 0; vert < mMorphData->mNumIndices; vert++) { F32 lastMaskWeight = mLastWeight * maskWeights[vert]; S32 out_vert = mMorphData->mVertexIndices[vert]; // remove effect of existing masked morph - coords[out_vert] -= LLVector4(mMorphData->mCoords[vert]) * lastMaskWeight; - scaled_normals[out_vert] -= mMorphData->mNormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR; - scaled_binormals[out_vert] -= mMorphData->mBinormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR; + LLVector4a t; + t = mMorphData->mCoords[vert]; + t.mul(lastMaskWeight); + coords[out_vert].sub(t); + + t = mMorphData->mNormals[vert]; + t.mul(lastMaskWeight*NORMAL_SOFTEN_FACTOR); + scaled_normals[out_vert].sub(t); + + t = mMorphData->mBinormals[vert]; + t.mul(lastMaskWeight*NORMAL_SOFTEN_FACTOR); + scaled_binormals[out_vert].sub(t); + tex_coords[out_vert] -= mMorphData->mTexCoords[vert] * lastMaskWeight; if (clothing_weights) { - LLVector3 clothing_offset = mMorphData->mCoords[vert] * lastMaskWeight; - LLVector4* clothing_weight = &clothing_weights[out_vert]; - clothing_weight->mV[VX] -= clothing_offset.mV[VX]; - clothing_weight->mV[VY] -= clothing_offset.mV[VY]; - clothing_weight->mV[VZ] -= clothing_offset.mV[VZ]; + LLVector4a clothing_offset = mMorphData->mCoords[vert]; + clothing_offset.mul(lastMaskWeight); + LLVector4a* clothing_weight = &clothing_weights[out_vert]; + LLVector4a t; + t.setSub(*clothing_weight, clothing_offset); + clothing_weight->setSelectWithMask(clothing_mask, clothing_offset, *clothing_weight); } } } @@ -654,7 +683,7 @@ LLPolyVertexMask::~LLPolyVertexMask() //----------------------------------------------------------------------------- // generateMask() //----------------------------------------------------------------------------- -void LLPolyVertexMask::generateMask(U8 *maskTextureData, S32 width, S32 height, S32 num_components, BOOL invert, LLVector4 *clothing_weights) +void LLPolyVertexMask::generateMask(U8 *maskTextureData, S32 width, S32 height, S32 num_components, BOOL invert, LLVector4a *clothing_weights) { // RN debug output that uses Image Debugger (http://www.cs.unc.edu/~baxter/projects/imdebug/) // BOOL debugImg = FALSE; @@ -698,7 +727,7 @@ void LLPolyVertexMask::generateMask(U8 *maskTextureData, S32 width, S32 height, if (clothing_weights) { - clothing_weights[vertIndex].mV[VW] = mWeights[index]; + clothing_weights[vertIndex].getF32ptr()[VW] = mWeights[index]; } } mWeightsGenerated = TRUE; diff --git a/indra/newview/llpolymorph.h b/indra/newview/llpolymorph.h index 8a024f2e9e..46e23b7792 100644 --- a/indra/newview/llpolymorph.h +++ b/indra/newview/llpolymorph.h @@ -58,14 +58,14 @@ public: U32 mNumIndices; U32* mVertexIndices; U32 mCurrentIndex; - LLVector3* mCoords; - LLVector3* mNormals; - LLVector3* mBinormals; + LLVector4a* mCoords; + LLVector4a* mNormals; + LLVector4a* mBinormals; LLVector2* mTexCoords; F32 mTotalDistortion; // vertex distortion summed over entire morph F32 mMaxDistortion; // maximum single vertex distortion in a given morph - LLVector3 mAvgDistortion; // average vertex distortion, to infer directionality of the morph + LLVector4a mAvgDistortion; // average vertex distortion, to infer directionality of the morph LLPolyMeshSharedData* mMesh; }; @@ -78,7 +78,7 @@ public: LLPolyVertexMask(LLPolyMorphData* morph_data); ~LLPolyVertexMask(); - void generateMask(U8 *maskData, S32 width, S32 height, S32 num_components, BOOL invert, LLVector4 *clothing_weights); + void generateMask(U8 *maskData, S32 width, S32 height, S32 num_components, BOOL invert, LLVector4a *clothing_weights); F32* getMorphMaskWeights(); @@ -157,11 +157,11 @@ public: // LLViewerVisualParam Virtual functions /*virtual*/ F32 getTotalDistortion(); - /*virtual*/ const LLVector3& getAvgDistortion(); + /*virtual*/ const LLVector4a& getAvgDistortion(); /*virtual*/ F32 getMaxDistortion(); - /*virtual*/ LLVector3 getVertexDistortion(S32 index, LLPolyMesh *poly_mesh); - /*virtual*/ const LLVector3* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh); - /*virtual*/ const LLVector3* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh); + /*virtual*/ LLVector4a getVertexDistortion(S32 index, LLPolyMesh *poly_mesh); + /*virtual*/ const LLVector4a* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh); + /*virtual*/ const LLVector4a* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh); void applyMask(U8 *maskData, S32 width, S32 height, S32 num_components, BOOL invert); void addPendingMorphMask() { mNumMorphMasksPending++; } diff --git a/indra/newview/lltexlayerparams.h b/indra/newview/lltexlayerparams.h index 74c22b0cdf..2c0da60b48 100644 --- a/indra/newview/lltexlayerparams.h +++ b/indra/newview/lltexlayerparams.h @@ -76,11 +76,11 @@ public: // LLViewerVisualParam Virtual functions /*virtual*/ F32 getTotalDistortion() { return 1.f; } - /*virtual*/ const LLVector3& getAvgDistortion() { return mAvgDistortionVec; } + /*virtual*/ const LLVector4a& getAvgDistortion() { return mAvgDistortionVec; } /*virtual*/ F32 getMaxDistortion() { return 3.f; } - /*virtual*/ LLVector3 getVertexDistortion(S32 index, LLPolyMesh *poly_mesh) { return LLVector3(1.f, 1.f, 1.f);} - /*virtual*/ const LLVector3* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh) { index = 0; poly_mesh = NULL; return &mAvgDistortionVec;}; - /*virtual*/ const LLVector3* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh) { index = 0; poly_mesh = NULL; return NULL;}; + /*virtual*/ LLVector4a getVertexDistortion(S32 index, LLPolyMesh *poly_mesh) { return LLVector4a(1.f, 1.f, 1.f);} + /*virtual*/ const LLVector4a* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh) { index = 0; poly_mesh = NULL; return &mAvgDistortionVec;}; + /*virtual*/ const LLVector4a* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh) { index = 0; poly_mesh = NULL; return NULL;}; // New functions BOOL render( S32 x, S32 y, S32 width, S32 height ); @@ -94,7 +94,7 @@ private: LLPointer mStaticImageRaw; BOOL mNeedsCreateTexture; BOOL mStaticImageInvalid; - LLVector3 mAvgDistortionVec; + LLVector4a mAvgDistortionVec; F32 mCachedEffectiveWeight; public: @@ -155,18 +155,18 @@ public: // LLViewerVisualParam Virtual functions /*virtual*/ F32 getTotalDistortion() { return 1.f; } - /*virtual*/ const LLVector3& getAvgDistortion() { return mAvgDistortionVec; } + /*virtual*/ const LLVector4a& getAvgDistortion() { return mAvgDistortionVec; } /*virtual*/ F32 getMaxDistortion() { return 3.f; } - /*virtual*/ LLVector3 getVertexDistortion(S32 index, LLPolyMesh *poly_mesh) { return LLVector3(1.f, 1.f, 1.f); } - /*virtual*/ const LLVector3* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh) { index = 0; poly_mesh = NULL; return &mAvgDistortionVec;}; - /*virtual*/ const LLVector3* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh) { index = 0; poly_mesh = NULL; return NULL;}; + /*virtual*/ LLVector4a getVertexDistortion(S32 index, LLPolyMesh *poly_mesh) { return LLVector4a(1.f, 1.f, 1.f); } + /*virtual*/ const LLVector4a* getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh) { index = 0; poly_mesh = NULL; return &mAvgDistortionVec;}; + /*virtual*/ const LLVector4a* getNextDistortion(U32 *index, LLPolyMesh **poly_mesh) { index = 0; poly_mesh = NULL; return NULL;}; // New functions LLColor4 getNetColor() const; protected: virtual void onGlobalColorChanged(bool upload_bake) {} private: - LLVector3 mAvgDistortionVec; + LLVector4a mAvgDistortionVec; }; class LLTexLayerParamColorInfo : public LLViewerVisualParamInfo diff --git a/indra/newview/llviewervisualparam.h b/indra/newview/llviewervisualparam.h index dd7751acd7..3bc95cbfbf 100644 --- a/indra/newview/llviewervisualparam.h +++ b/indra/newview/llviewervisualparam.h @@ -83,11 +83,11 @@ public: // New Virtual functions virtual F32 getTotalDistortion() = 0; - virtual const LLVector3& getAvgDistortion() = 0; + virtual const LLVector4a& getAvgDistortion() = 0; virtual F32 getMaxDistortion() = 0; - virtual LLVector3 getVertexDistortion(S32 index, LLPolyMesh *mesh) = 0; - virtual const LLVector3* getFirstDistortion(U32 *index, LLPolyMesh **mesh) = 0; - virtual const LLVector3* getNextDistortion(U32 *index, LLPolyMesh **mesh) = 0; + virtual LLVector4a getVertexDistortion(S32 index, LLPolyMesh *mesh) = 0; + virtual const LLVector4a* getFirstDistortion(U32 *index, LLPolyMesh **mesh) = 0; + virtual const LLVector4a* getNextDistortion(U32 *index, LLPolyMesh **mesh) = 0; // interface methods F32 getDisplayOrder() const { return getInfo()->mEditGroupDisplayOrder; } -- cgit v1.3