From e68d5e248f73180def7c8928b32482347dd91de4 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 14 Dec 2011 16:18:19 -0500 Subject: SH-2789 WIP - add asserts to check object address alignment where needed --- indra/llmath/llvector4a.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'indra/llmath/llvector4a.h') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 596082509d..eada6ed1b7 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,6 +32,7 @@ class LLRotation; #include #include "llpreprocessor.h" +#include "llmemory.h" /////////////////////////////////// // FIRST TIME USERS PLEASE READ @@ -90,6 +91,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary + ll_assert_aligned(this,16); } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) -- cgit v1.3 From e4a1b12e7a9c4bb8cbc2a1e41fddb8cec5a1b1e0 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Thu, 15 Dec 2011 12:48:07 -0500 Subject: SH-2789 WIP - build without tcmalloc, force alignment in various places --- indra/cmake/FindGooglePerfTools.cmake | 2 ++ indra/cmake/GooglePerfTools.cmake | 2 +- indra/llmath/llcamera.h | 12 ++++++------ indra/llmath/llmatrix3a.h | 2 +- indra/llmath/llmatrix4a.h | 2 +- indra/llmath/llplane.h | 4 +++- indra/llmath/llvector4a.h | 3 ++- indra/newview/CMakeLists.txt | 5 +++-- indra/newview/lldrawable.h | 7 ++++--- indra/newview/llspatialpartition.h | 12 ++++++------ indra/newview/llviewercamera.h | 4 +++- indra/newview/llvoavatar.cpp | 2 +- indra/newview/llvoavatar.h | 2 +- 13 files changed, 34 insertions(+), 25 deletions(-) mode change 100644 => 100755 indra/cmake/FindGooglePerfTools.cmake mode change 100644 => 100755 indra/cmake/GooglePerfTools.cmake mode change 100644 => 100755 indra/llmath/llcamera.h mode change 100644 => 100755 indra/llmath/llmatrix4a.h mode change 100644 => 100755 indra/llmath/llplane.h mode change 100644 => 100755 indra/llmath/llvector4a.h mode change 100644 => 100755 indra/newview/CMakeLists.txt mode change 100644 => 100755 indra/newview/lldrawable.h mode change 100644 => 100755 indra/newview/llspatialpartition.h mode change 100644 => 100755 indra/newview/llviewercamera.h mode change 100644 => 100755 indra/newview/llvoavatar.cpp mode change 100644 => 100755 indra/newview/llvoavatar.h (limited to 'indra/llmath/llvector4a.h') diff --git a/indra/cmake/FindGooglePerfTools.cmake b/indra/cmake/FindGooglePerfTools.cmake old mode 100644 new mode 100755 index bb125d538e..1c785d3461 --- a/indra/cmake/FindGooglePerfTools.cmake +++ b/indra/cmake/FindGooglePerfTools.cmake @@ -64,3 +64,5 @@ MARK_AS_ADVANCED( PROFILER_LIBRARY GOOGLE_PERFTOOLS_INCLUDE_DIR ) + +SET(GOOGLE_PERFTOOLS_FOUND "NO") diff --git a/indra/cmake/GooglePerfTools.cmake b/indra/cmake/GooglePerfTools.cmake old mode 100644 new mode 100755 index d9f91193be..2e96912769 --- a/indra/cmake/GooglePerfTools.cmake +++ b/indra/cmake/GooglePerfTools.cmake @@ -29,7 +29,7 @@ if (GOOGLE_PERFTOOLS_FOUND) endif (GOOGLE_PERFTOOLS_FOUND) if (WINDOWS) - set(USE_GOOGLE_PERFTOOLS ON) + set(USE_GOOGLE_PERFTOOLS OFF) endif (WINDOWS) if (USE_GOOGLE_PERFTOOLS) diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h old mode 100644 new mode 100755 index ec67b91d05..0b591be622 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -60,7 +60,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD; // roll(), pitch(), yaw() // etc... - +LL_ALIGN_PREFIX(16) class LLCamera : public LLCoordFrame { @@ -108,7 +108,7 @@ public: }; private: - LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + LL_ALIGN_16(LLPlane mAgentPlanes[7]); //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP U8 mPlaneMask[8]; // 8 for alignment F32 mView; // angle between top and bottom frustum planes in radians. @@ -116,13 +116,13 @@ private: S32 mViewHeightInPixels; // for ViewHeightInPixels() only F32 mNearPlane; F32 mFarPlane; - LLPlane mLocalPlanes[4]; + LL_ALIGN_16(LLPlane mLocalPlanes[4]); F32 mFixedDistance; // Always return this distance, unless < 0 LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test F32 mFrustRadiusSquared; - LLPlane mWorldPlanes[PLANE_NUM]; - LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; + LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); + LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]); U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in @@ -208,7 +208,7 @@ protected: void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom); void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2); void calculateWorldFrustumPlanes(); -}; +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index adb7e3389d..9916cfd2da 100644 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -111,7 +111,7 @@ public: protected: - LLVector4a mColumns[3]; + LL_ALIGN_16(LLVector4a mColumns[3]); }; diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h old mode 100644 new mode 100755 index 27cf5b79f6..c4cefdb4fa --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -34,7 +34,7 @@ class LLMatrix4a { public: - LLVector4a mMatrix[4]; + LL_ALIGN_16(LLVector4a mMatrix[4]); inline void clear() { diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h old mode 100644 new mode 100755 index a611894721..3c32441b11 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,6 +36,8 @@ // The plane normal = [A, B, C] // The closest approach = D / sqrt(A*A + B*B + C*C) + +LL_ALIGN_PREFIX(16) class LLPlane { public: @@ -94,7 +96,7 @@ public: private: LLVector4a mV; -}; +} LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h old mode 100644 new mode 100755 index eada6ed1b7..9de0e66774 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -47,6 +47,7 @@ class LLRotation; // LLVector3/LLVector4. ///////////////////////////////// +LL_ALIGN_PREFIX(16) class LLVector4a { public: @@ -315,7 +316,7 @@ public: private: LLQuad mQ; -}; +} LL_ALIGN_POSTFIX(16); inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { diff --git a/indra/newview/CMakeLists.txt b/indra/newview/CMakeLists.txt old mode 100644 new mode 100755 index 6b2fe1e45a..825cbbef8f --- a/indra/newview/CMakeLists.txt +++ b/indra/newview/CMakeLists.txt @@ -1509,7 +1509,8 @@ if (WINDOWS) PROPERTIES # *TODO -reenable this once we get server usage sorted out #LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:\"__tcmalloc\"" - LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:__tcmalloc" +# LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:__tcmalloc" + LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS" LINK_FLAGS_DEBUG "/NODEFAULTLIB:\"LIBCMT;LIBCMTD;MSVCRT\" /INCREMENTAL:NO" LINK_FLAGS_RELEASE "" ) @@ -1764,7 +1765,7 @@ target_link_libraries(${VIEWER_BINARY_NAME} ${CRYPTO_LIBRARIES} ${LLLOGIN_LIBRARIES} ${LLCONVEXDECOMP_LIBRARY} - ${TCMALLOC_LIBRARIES} +# ${TCMALLOC_LIBRARIES} ) if (USE_KDU) diff --git a/indra/newview/lldrawable.h b/indra/newview/lldrawable.h old mode 100644 new mode 100755 index e268640a21..244b87b2e7 --- a/indra/newview/lldrawable.h +++ b/indra/newview/lldrawable.h @@ -59,6 +59,7 @@ class LLViewerTexture; const U32 SILHOUETTE_HIGHLIGHT = 0; // All data for new renderer goes into this class. +LL_ALIGN_PREFIX(16) class LLDrawable : public LLRefCount { public: @@ -280,8 +281,8 @@ public: } EDrawableFlags; private: //aligned members - LLVector4a mExtents[2]; - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mExtents[2]); + LL_ALIGN_16(LLVector4a mPositionGroup); public: LLXformMatrix mXform; @@ -322,7 +323,7 @@ private: static U32 sNumZombieDrawables; static LLDynamicArrayPtr > sDeadList; -}; +} LL_ALIGN_POSTFIX(16); inline LLFace* LLDrawable::getFace(const S32 i) const diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h old mode 100644 new mode 100755 index f0c8a372ee..cbd4507f84 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -372,12 +372,12 @@ public: V4_COUNT = 10 } eV4Index; - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children - LLVector4a mObjectExtents[2]; // extents (min, max) of objects in this node - LLVector4a mObjectBounds[2]; // bounding box (center, size) of objects in this node - LLVector4a mViewAngle; - LLVector4a mLastUpdateViewAngle; + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mObjectExtents[2]); // extents (min, max) of objects in this node + LL_ALIGN_16(LLVector4a mObjectBounds[2]); // bounding box (center, size) of objects in this node + LL_ALIGN_16(LLVector4a mViewAngle); + LL_ALIGN_16(LLVector4a mLastUpdateViewAngle); private: U32 mCurUpdatingTime ; diff --git a/indra/newview/llviewercamera.h b/indra/newview/llviewercamera.h old mode 100644 new mode 100755 index cc3395115b..7257297cfd --- a/indra/newview/llviewercamera.h +++ b/indra/newview/llviewercamera.h @@ -51,6 +51,7 @@ const BOOL NOT_FOR_SELECTION = FALSE; extern template class LLViewerCamera* LLSingleton::getInstance(); #endif +LL_ALIGN_PREFIX(16) class LLViewerCamera : public LLCamera, public LLSingleton { public: @@ -137,6 +138,7 @@ protected: S16 mZoomSubregion; public: -}; +} LL_ALIGN_POSTFIX(16); + #endif // LL_LLVIEWERCAMERA_H diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp old mode 100644 new mode 100755 index 4767ba2bed..ad5ea15bbb --- a/indra/newview/llvoavatar.cpp +++ b/indra/newview/llvoavatar.cpp @@ -2595,7 +2595,7 @@ void LLVOAvatar::idleUpdateMisc(bool detailed_update) if (isImpostor() && !mNeedsImpostorUpdate) { - LLVector4a ext[2]; + LL_ALIGN_16(LLVector4a ext[2]); F32 distance; LLVector3 angle; diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h old mode 100644 new mode 100755 index 59796370ae..82157436b9 --- a/indra/newview/llvoavatar.h +++ b/indra/newview/llvoavatar.h @@ -212,7 +212,7 @@ public: bool isBuilt() const { return mIsBuilt; } private: //aligned members - LLVector4a mImpostorExtents[2]; + LL_ALIGN_16(LLVector4a mImpostorExtents[2]); private: BOOL mSupportsAlphaLayers; // For backwards compatibility, TRUE for 1.23+ clients -- cgit v1.3 From 1435a8b9e6203911d2ebe9e3ba217f8eb20e3140 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 4 Jan 2012 15:21:23 -0500 Subject: SH-2789 WIP - stricter calling of memcpyNonAliased16 --- indra/llcommon/llmemory.cpp | 3 +++ indra/llcommon/llmemory.h | 12 ++++++------ indra/llmath/llvector4a.cpp | 3 --- indra/llmath/llvector4a.h | 1 + indra/llmath/tests/alignment_test.cpp | 6 ++++-- indra/newview/llfloatermodelpreview.cpp | 3 ++- indra/newview/llviewerjointmesh.cpp | 6 ++++-- 7 files changed, 20 insertions(+), 14 deletions(-) mode change 100644 => 100755 indra/llcommon/llmemory.cpp mode change 100644 => 100755 indra/newview/llviewerjointmesh.cpp (limited to 'indra/llmath/llvector4a.h') diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp old mode 100644 new mode 100755 index 22204e756a..afaf366668 --- a/indra/llcommon/llmemory.cpp +++ b/indra/llcommon/llmemory.cpp @@ -63,11 +63,14 @@ LLPrivateMemoryPoolManager::mem_allocation_info_t LLPrivateMemoryPoolManager::sM void ll_assert_aligned_func(uintptr_t ptr,U32 alignment) { +#ifdef SHOW_ASSERT + // Redundant, place to set breakpoints. if (ptr%alignment!=0) { llwarns << "alignment check failed" << llendl; } llassert(ptr%alignment==0); +#endif } //static diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 3eaf700bf1..1b54b56107 100755 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -83,7 +83,7 @@ inline void ll_aligned_free_16(void *p) inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). { #if defined(LL_WINDOWS) - return _mm_malloc(size, 32); + return _aligned_malloc(size, 32); #elif defined(LL_DARWIN) return ll_aligned_malloc( size, 32 ); #else @@ -98,7 +98,7 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi inline void ll_aligned_free_32(void *p) { #if defined(LL_WINDOWS) - _mm_free(p); + _aligned_free(p); #elif defined(LL_DARWIN) ll_aligned_free( p ); #else @@ -107,10 +107,12 @@ inline void ll_aligned_free_32(void *p) } #else // USE_TCMALLOC -// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals) +// ll_aligned_foo are noops now that we use tcmalloc everywhere +// (tcmalloc aligns automatically at appropriate intervals) #define ll_aligned_malloc( size, align ) malloc(size) #define ll_aligned_free( ptr ) free(ptr) #define ll_aligned_malloc_16 malloc +#define ll_aligned_realloc_16 realloc #define ll_aligned_free_16 free #define ll_aligned_malloc_32 malloc #define ll_aligned_free_32 free @@ -524,11 +526,9 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr) // LLSingleton moved to llsingleton.h -#define CHECK_ALIGNMENT - LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); -#ifdef CHECK_ALIGNMENT +#ifdef SHOW_ASSERT #define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) #else #define ll_assert_aligned(ptr,alignment) diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 480ccf4ed9..6edeb0fefe 100755 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,8 +41,6 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { -// memcpy((void*)dst,(const void*)src,bytes); -#if 1 assert(src != NULL); assert(dst != NULL); assert(bytes > 0); @@ -92,7 +90,6 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F dst += 4; src += 4; } -#endif } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 9de0e66774..0526793d3a 100755 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -84,6 +84,7 @@ public: } // Copy words 16-byte blocks from src to dst. Source and destination must not overlap. + // Source and dest must be 16-byte aligned and size must be multiple of 16. static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); //////////////////////////////////// diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp index 51a7051e69..dc9b41957d 100755 --- a/indra/llmath/tests/alignment_test.cpp +++ b/indra/llmath/tests/alignment_test.cpp @@ -72,6 +72,10 @@ void alignment_test_object_t::test<1>() { align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a)); ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16)); + + align_ptr = ll_aligned_realloc_16(2*sizeof(MyVector4a)); + ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16)); + ll_aligned_free_16(align_ptr); align_ptr = ll_aligned_malloc_32(sizeof(MyVector4a)); @@ -84,8 +88,6 @@ void alignment_test_object_t::test<1>() template<> template<> void alignment_test_object_t::test<2>() { - ensure("LLAlignment reality is broken: ", (1==1)); - MyVector4a vec1; ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16)); diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp index 64bdcccd9f..8aa3b95578 100755 --- a/indra/newview/llfloatermodelpreview.cpp +++ b/indra/newview/llfloatermodelpreview.cpp @@ -4770,7 +4770,8 @@ void LLModelPreview::genBuffers(S32 lod, bool include_skin_weights) if (vf.mTexCoords) { vb->getTexCoord0Strider(tc_strider); - LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32)); + S32 tex_size = (num_vertices*2*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, tex_size); } if (vf.mNormals) diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp old mode 100644 new mode 100755 index 76f4e18c27..d604687678 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -731,8 +731,10 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w F32* vw = (F32*) vertex_weightsp.get(); F32* cw = (F32*) clothing_weightsp.get(); - LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2*sizeof(F32)); - LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts*sizeof(F32)); + S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size); + S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size); LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32)); } -- cgit v1.3