From 11afa09ea3f56c0e20eb195ae1520a88602ceaca Mon Sep 17 00:00:00 2001
From: Nat Goodspeed <nat@lindenlab.com>
Date: Fri, 22 Oct 2021 11:36:31 -0400
Subject: SL-16220: Add LL::ThreadPool class and a "General" instance.

ThreadPool bundles a WorkQueue with the specified number of worker threads to
service it. Each ThreadPool has a name that can be used to locate its
WorkQueue.

Each worker thread calls WorkQueue::runUntilClose().

ThreadPool listens on the "LLApp" LLEventPump for shutdown notification. On
receiving that, it closes its WorkQueue and then join()s each of its worker
threads for orderly shutdown.

Add a settings.xml entry "ThreadPoolSizes", the first LLSD-valued settings
entry to expect a map: pool name->size. The expectation is that usually code
instantiating a particular ThreadPool will have a default size in mind, but it
should check "ThreadPoolSizes" for a user override.

Make idle_startup()'s STATE_SEED_CAP_GRANTED state instantiate a "General"
ThreadPool. This is function-static for lazy initialization.

Eliminate LLMainLoopRepeater, which is completely unreferenced. Any potential
future use cases are better addressed by posting to the main loop's WorkQueue.

Eliminate llappviewer.cpp's private LLDeferredTaskList class, which
implemented LLAppViewer::addOnIdleCallback(). Make addOnIdleCallback() post
work to the main loop's WorkQueue instead.
---
 indra/llcommon/threadpool.cpp | 75 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 indra/llcommon/threadpool.cpp

(limited to 'indra/llcommon/threadpool.cpp')

diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp
new file mode 100644
index 0000000000..aa7d4179a2
--- /dev/null
+++ b/indra/llcommon/threadpool.cpp
@@ -0,0 +1,75 @@
+/**
+ * @file   threadpool.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-21
+ * @brief  Implementation for threadpool.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "threadpool.h"
+// STL headers
+// std headers
+// external library headers
+// other Linden headers
+#include "llerror.h"
+#include "llevents.h"
+#include "stringize.h"
+
+LL::ThreadPool::ThreadPool(const std::string& name, size_t threads):
+    mQueue(name),
+    mName("ThreadPool:" + name)
+{
+    for (size_t i = 0; i < threads; ++i)
+    {
+        std::string tname{ STRINGIZE(mName << ':' << (i+i) << '/' << threads) };
+        mThreads.emplace_back(tname, [this, tname](){ run(tname); });
+    }
+    // Listen on "LLApp", and when the app is shutting down, close the queue
+    // and join the workers.
+    LLEventPumps::instance().obtain("LLApp").listen(
+        mName,
+        [this](const LLSD& stat)
+        {
+            std::string status(stat["status"]);
+            if (status != "running")
+            {
+                // viewer is starting shutdown -- proclaim the end is nigh!
+                LL_DEBUGS("ThreadPool") << mName << " saw " << status << LL_ENDL;
+                close();
+            }
+            return false;
+        });
+}
+
+LL::ThreadPool::~ThreadPool()
+{
+    close();
+}
+
+void LL::ThreadPool::close()
+{
+    if (! mQueue.isClosed())
+    {
+        LL_DEBUGS("ThreadPool") << mName << " closing queue and joining threads" << LL_ENDL;
+        mQueue.close();
+        for (auto& pair: mThreads)
+        {
+            LL_DEBUGS("ThreadPool") << mName << " waiting on thread " << pair.first << LL_ENDL;
+            pair.second.join();
+        }
+        LL_DEBUGS("ThreadPool") << mName << " shutdown complete" << LL_ENDL;
+    }
+}
+
+void LL::ThreadPool::run(const std::string& name)
+{
+    LL_DEBUGS("ThreadPool") << name << " starting" << LL_ENDL;
+    mQueue.runUntilClose();
+    LL_DEBUGS("ThreadPool") << name << " stopping" << LL_ENDL;
+}
-- 
cgit v1.3


From d2763897f22e3d7789f97fe68000662ecd4a3548 Mon Sep 17 00:00:00 2001
From: Nat Goodspeed <nat@lindenlab.com>
Date: Fri, 22 Oct 2021 21:51:44 -0400
Subject: SL-16220: Fix thread name expression.

---
 indra/llcommon/threadpool.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'indra/llcommon/threadpool.cpp')

diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp
index aa7d4179a2..1899f9a20a 100644
--- a/indra/llcommon/threadpool.cpp
+++ b/indra/llcommon/threadpool.cpp
@@ -27,7 +27,7 @@ LL::ThreadPool::ThreadPool(const std::string& name, size_t threads):
 {
     for (size_t i = 0; i < threads; ++i)
     {
-        std::string tname{ STRINGIZE(mName << ':' << (i+i) << '/' << threads) };
+        std::string tname{ STRINGIZE(mName << ':' << (i+1) << '/' << threads) };
         mThreads.emplace_back(tname, [this, tname](){ run(tname); });
     }
     // Listen on "LLApp", and when the app is shutting down, close the queue
-- 
cgit v1.3


From 834e7ca088b5f417235327cd290b42459c733594 Mon Sep 17 00:00:00 2001
From: Nat Goodspeed <nat@lindenlab.com>
Date: Thu, 4 Nov 2021 17:18:57 -0400
Subject: SL-16202: Use large WorkQueue size limits for mainloop and General.

Give ThreadPool and WorkQueue the ability to override default
ThreadSafeSchedule capacity.

Instantiate "mainloop" WorkQueue and "General" ThreadPool with very large
capacity because we never want to have to block trying to push to either.
---
 indra/llcommon/threadpool.cpp | 4 ++--
 indra/llcommon/threadpool.h   | 2 +-
 indra/llcommon/workqueue.cpp  | 5 +++--
 indra/llcommon/workqueue.h    | 2 +-
 indra/newview/llappviewer.cpp | 4 +++-
 indra/newview/llstartup.cpp   | 4 +++-
 6 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'indra/llcommon/threadpool.cpp')

diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp
index 1899f9a20a..e4fa0eccf3 100644
--- a/indra/llcommon/threadpool.cpp
+++ b/indra/llcommon/threadpool.cpp
@@ -21,8 +21,8 @@
 #include "llevents.h"
 #include "stringize.h"
 
-LL::ThreadPool::ThreadPool(const std::string& name, size_t threads):
-    mQueue(name),
+LL::ThreadPool::ThreadPool(const std::string& name, size_t threads, size_t capacity):
+    mQueue(name, capacity),
     mName("ThreadPool:" + name)
 {
     for (size_t i = 0; i < threads; ++i)
diff --git a/indra/llcommon/threadpool.h b/indra/llcommon/threadpool.h
index 8f3c8514b5..6e3858508b 100644
--- a/indra/llcommon/threadpool.h
+++ b/indra/llcommon/threadpool.h
@@ -29,7 +29,7 @@ namespace LL
          * Pass ThreadPool a string name. This can be used to look up the
          * relevant WorkQueue.
          */
-        ThreadPool(const std::string& name, size_t threads=1);
+        ThreadPool(const std::string& name, size_t threads=1, size_t capacity=1024);
         ~ThreadPool();
         void close();
 
diff --git a/indra/llcommon/workqueue.cpp b/indra/llcommon/workqueue.cpp
index 9808757b0a..14ae4c4ab8 100644
--- a/indra/llcommon/workqueue.cpp
+++ b/indra/llcommon/workqueue.cpp
@@ -26,8 +26,9 @@
 using Mutex = LLCoros::Mutex;
 using Lock  = LLCoros::LockType;
 
-LL::WorkQueue::WorkQueue(const std::string& name):
-    super(makeName(name))
+LL::WorkQueue::WorkQueue(const std::string& name, size_t capacity):
+    super(makeName(name)),
+    mQueue(capacity)
 {
     // TODO: register for "LLApp" events so we can implicitly close() on
     // viewer shutdown.
diff --git a/indra/llcommon/workqueue.h b/indra/llcommon/workqueue.h
index d0e3f870fe..5987883829 100644
--- a/indra/llcommon/workqueue.h
+++ b/indra/llcommon/workqueue.h
@@ -54,7 +54,7 @@ namespace LL
          * You may omit the WorkQueue name, in which case a unique name is
          * synthesized; for practical purposes that makes it anonymous.
          */
-        WorkQueue(const std::string& name = std::string());
+        WorkQueue(const std::string& name = std::string(), size_t capacity=1024);
 
         /**
          * Since the point of WorkQueue is to pass work to some other worker
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index ea2e3a4007..02b4dd57f1 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -367,7 +367,9 @@ BOOL gLogoutInProgress = FALSE;
 
 BOOL gSimulateMemLeak = FALSE;
 
-WorkQueue gMainloopWork("mainloop");
+// We don't want anyone, especially threads working on the graphics pipeline,
+// to have to block due to this WorkQueue being full.
+WorkQueue gMainloopWork("mainloop", 1024*1024);
 
 ////////////////////////////////////////////////////////////
 // Internal globals... that should be removed.
diff --git a/indra/newview/llstartup.cpp b/indra/newview/llstartup.cpp
index 13e7fcb6e4..9a4149948c 100644
--- a/indra/newview/llstartup.cpp
+++ b/indra/newview/llstartup.cpp
@@ -313,7 +313,9 @@ void launchThreadPool()
                             << size << " threads" << LL_ENDL;
     // Use a function-static ThreadPool: static duration, but instantiated
     // only on demand.
-    static LL::ThreadPool pool("General", size);
+    // We don't want anyone, especially the main thread, to have to block
+    // due to this ThreadPool being full.
+    static LL::ThreadPool pool("General", size, 1024*1024);
 }
 
 void update_texture_fetch()
-- 
cgit v1.3


From ff5496239bffadaca111b1e4380a01447f85843a Mon Sep 17 00:00:00 2001
From: Nat Goodspeed <nat@lindenlab.com>
Date: Fri, 5 Nov 2021 12:33:31 -0400
Subject: SL-16202: Use WorkQueue::postTo() for texture create/post handshake.

That is, when LLViewerFetchedTexture::scheduleCreateTexture() wants to call
createTexture() on the LLImageGLThread, but postCreateTexture() on the main
thread, use the "mainloop" WorkQueue to set up the handshake.

Give ThreadPool a public virtual run() method so a subclass can override with
desired behavior. This necessitates a virtual destructor. Add accessors for
embedded WorkQueue (for post calls), ThreadPool name and width (in threads).

Allow LLSimpleton::createInstance() to forward arguments to the subject
constructor.

Make LLImageGLThread an LLSimpleton - that abstraction didn't yet exist at the
time LLImageGLThread was coded. Also derive from ThreadPool rather than
LLThread. Make it a single-thread "pool" with a very large queue capacity.
---
 indra/llcommon/llsingleton.h      | 20 +++++++++++---------
 indra/llcommon/threadpool.cpp     |  7 ++++++-
 indra/llcommon/threadpool.h       | 18 +++++++++++++++++-
 indra/llrender/llimagegl.cpp      | 22 +++++++++++-----------
 indra/llrender/llimagegl.h        | 11 ++++-------
 indra/newview/llviewertexture.cpp | 34 +++++++++++++++++++++++-----------
 indra/newview/llviewertexture.h   |  4 ++++
 7 files changed, 76 insertions(+), 40 deletions(-)

(limited to 'indra/llcommon/threadpool.cpp')

diff --git a/indra/llcommon/llsingleton.h b/indra/llcommon/llsingleton.h
index 24d01812c9..fdd5bdfea9 100644
--- a/indra/llcommon/llsingleton.h
+++ b/indra/llcommon/llsingleton.h
@@ -847,22 +847,24 @@ template<class T>
 class LLSimpleton
 {
 public:
-    static T* sInstance;
-    
-    static void createInstance() 
-    { 
+    template <typename... ARGS>
+    static void createInstance(ARGS&&... args)
+    {
         llassert(sInstance == nullptr);
-        sInstance = new T(); 
+        sInstance = new T(std::forward<ARGS>(args)...);
     }
-    
+
     static inline T* getInstance() { return sInstance; }
     static inline T& instance() { return *getInstance(); }
     static inline bool instanceExists() { return sInstance != nullptr; }
 
-    static void deleteSingleton() { 
-        delete sInstance; 
-        sInstance = nullptr; 
+    static void deleteSingleton() {
+        delete sInstance;
+        sInstance = nullptr;
     }
+
+private:
+    static T* sInstance;
 };
 
 template <class T>
diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp
index e4fa0eccf3..cf25cc838e 100644
--- a/indra/llcommon/threadpool.cpp
+++ b/indra/llcommon/threadpool.cpp
@@ -70,6 +70,11 @@ void LL::ThreadPool::close()
 void LL::ThreadPool::run(const std::string& name)
 {
     LL_DEBUGS("ThreadPool") << name << " starting" << LL_ENDL;
-    mQueue.runUntilClose();
+    run();
     LL_DEBUGS("ThreadPool") << name << " stopping" << LL_ENDL;
 }
+
+void LL::ThreadPool::run()
+{
+    mQueue.runUntilClose();
+}
diff --git a/indra/llcommon/threadpool.h b/indra/llcommon/threadpool.h
index 6e3858508b..1ca24aec58 100644
--- a/indra/llcommon/threadpool.h
+++ b/indra/llcommon/threadpool.h
@@ -30,9 +30,25 @@ namespace LL
          * relevant WorkQueue.
          */
         ThreadPool(const std::string& name, size_t threads=1, size_t capacity=1024);
-        ~ThreadPool();
+        virtual ~ThreadPool();
+
+        /**
+         * ThreadPool listens for application shutdown messages on the "LLApp"
+         * LLEventPump. Call close() to shut down this ThreadPool early.
+         */
         void close();
 
+        std::string getName() const { return mName; }
+        size_t getWidth() const { return mThreads.size(); }
+        /// obtain a non-const reference to the WorkQueue to post work to it
+        WorkQueue& getQueue() { return mQueue; }
+
+        /**
+         * Override run() if you need special processing. The default run()
+         * implementation simply calls WorkQueue::runUntilClose().
+         */
+        virtual void run();
+
     private:
         void run(const std::string& name);
 
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 71c48801ac..1b6920fe3b 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -172,24 +172,19 @@ BOOL is_little_endian()
 	return (*c == 0x78) ;
 }
 
-LLImageGLThread* LLImageGLThread::sInstance = nullptr;
-
 //static 
 void LLImageGL::initClass(LLWindow* window, S32 num_catagories, BOOL skip_analyze_alpha /* = false */)
 {
     LL_PROFILE_ZONE_SCOPED;
 	sSkipAnalyzeAlpha = skip_analyze_alpha;
-    LLImageGLThread::sInstance = new LLImageGLThread(window);
-    LLImageGLThread::sInstance->start();
+    LLImageGLThread::createInstance(window);
 }
 
 //static 
 void LLImageGL::cleanupClass() 
 {
     LL_PROFILE_ZONE_SCOPED;
-    LLImageGLThread::sInstance->mFunctionQueue.close();
-    delete LLImageGLThread::sInstance;
-    LLImageGLThread::sInstance = nullptr;
+    LLImageGLThread::deleteSingleton();
 }
 
 //static
@@ -1532,8 +1527,7 @@ BOOL LLImageGL::createGLTexture(S32 discard_level, const U8* data_in, BOOL data_
     }
 
     //if we're on the image loading thread, be sure to delete old_texname and update mTexName on the main thread
-    if (LLImageGLThread::sInstance != nullptr && 
-        LLThread::currentID() == LLImageGLThread::sInstance->getID())
+    if (! on_main_thread())
     {
         {
             LL_PROFILE_ZONE_NAMED("cglt - sync");
@@ -2257,7 +2251,11 @@ void LLImageGL::resetCurTexSizebar()
 */  
 
 LLImageGLThread::LLImageGLThread(LLWindow* window)
-    : LLThread("LLImageGL"), mWindow(window)
+    // We want exactly one thread, but a very large capacity: we never want
+    // anyone, especially inner-loop render code, to have to block on post()
+    // because we're full.
+    : ThreadPool("LLImageGL", 1, 1024*1024)
+    , mWindow(window)
 {
     mFinished = false;
 
@@ -2266,9 +2264,11 @@ LLImageGLThread::LLImageGLThread(LLWindow* window)
 
 void LLImageGLThread::run()
 {
+    // We must perform setup on this thread before actually servicing our
+    // WorkQueue, likewise cleanup afterwards.
     mWindow->makeContextCurrent(mContext);
     gGL.init();
-    mFunctionQueue.runUntilClose();
+    ThreadPool::run();
     gGL.shutdown();
     mWindow->destroySharedContext(mContext);
 }
diff --git a/indra/llrender/llimagegl.h b/indra/llrender/llimagegl.h
index b9de481aae..27496def1d 100644
--- a/indra/llrender/llimagegl.h
+++ b/indra/llrender/llimagegl.h
@@ -37,6 +37,7 @@
 #include "llunits.h"
 #include "llthreadsafequeue.h"
 #include "llrender.h"
+#include "threadpool.h"
 #include "workqueue.h"
 
 class LLTextureAtlas ;
@@ -307,7 +308,7 @@ public:
 
 };
 
-class LLImageGLThread : public LLThread
+class LLImageGLThread : public LLSimpleton<LLImageGLThread>, LL::ThreadPool
 {
 public:
     LLImageGLThread(LLWindow* window);
@@ -316,19 +317,15 @@ public:
     template <typename CALLABLE>
     bool post(CALLABLE&& func)
     {
-        return mFunctionQueue.postIfOpen(std::forward<CALLABLE>(func));
+        return getQueue().postIfOpen(std::forward<CALLABLE>(func));
     }
 
     void run() override;
 
-    // Work Queue for background thread
-    LL::WorkQueue mFunctionQueue;
-
+private:
     LLWindow* mWindow;
     void* mContext;
     LLAtomicBool mFinished;
-
-    static LLImageGLThread* sInstance;
 };
 
 
diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp
index 9f3819f7d1..498e4ef8bc 100644
--- a/indra/newview/llviewertexture.cpp
+++ b/indra/newview/llviewertexture.cpp
@@ -679,6 +679,9 @@ void LLViewerTexture::init(bool firstinit)
 	
 	mVolumeList[LLRender::LIGHT_TEX].clear();
 	mVolumeList[LLRender::SCULPT_TEX].clear();
+
+	mMainQueue	= LL::WorkQueue::getInstance("mainloop");
+	mImageQueue = LL::WorkQueue::getInstance("LLImageGL");
 }
 
 //virtual 
@@ -1622,17 +1625,26 @@ void LLViewerFetchedTexture::scheduleCreateTexture()
     {
         mNeedsCreateTexture = TRUE;
 #if LL_WINDOWS //flip to 0 to revert to single-threaded OpenGL texture uploads
-        if (!LLImageGLThread::sInstance->post([this]()
-            {
-                //actually create the texture on a background thread
-                createTexture();
-                LL::WorkQueue::getInstance("mainloop")->post([this]()
-                    {
-                        //finalize on main thread
-                        postCreateTexture();
-                        unref();
-                    });
-            }))
+        auto mainq = mMainQueue.lock();
+        if (mainq)
+        {
+            mainq->postTo(
+                mImageQueue,
+                // work to be done on LLImageGL worker thread
+                [this]()
+                {
+                    //actually create the texture on a background thread
+                    createTexture();
+                },
+                // callback to be run on main thread
+                [this]()
+                {
+                    //finalize on main thread
+                    postCreateTexture();
+                    unref();
+                });
+        }
+        else
 #endif
         {
             gTextureList.mCreateTextureList.insert(this);
diff --git a/indra/newview/llviewertexture.h b/indra/newview/llviewertexture.h
index f9f1bfef44..4cd4c7cd39 100644
--- a/indra/newview/llviewertexture.h
+++ b/indra/newview/llviewertexture.h
@@ -35,6 +35,7 @@
 #include "llrender.h"
 #include "llmetricperformancetester.h"
 #include "httpcommon.h"
+#include "workqueue.h"
 
 #include <map>
 #include <list>
@@ -213,6 +214,9 @@ protected:
 	//do not use LLPointer here.
 	LLViewerMediaTexture* mParcelMedia ;
 
+	LL::WorkQueue::weak_t mMainQueue;
+	LL::WorkQueue::weak_t mImageQueue;
+
 	static F32 sTexelPixelRatio;
 public:
 	static const U32 sCurrentFileVersion;	
-- 
cgit v1.3


From 9b0d8c7e629597fd8e6dfb91a6b8f625b34ab274 Mon Sep 17 00:00:00 2001
From: Runitai Linden <davep@lindenlab.com>
Date: Mon, 22 Nov 2021 18:42:56 -0600
Subject: SL-16094 More profile hooks for threading code, remove redundant
 wglCreateContextAttribs call

---
 indra/llcommon/llthreadsafequeue.h  | 18 ++++++++++++++++++
 indra/llcommon/threadpool.cpp       |  6 +++++-
 indra/llcommon/threadsafeschedule.h | 34 ++++++++++++++++++++++++++++++----
 indra/llrender/llimagegl.cpp        |  2 ++
 indra/llwindow/llwindowwin32.cpp    |  2 +-
 5 files changed, 56 insertions(+), 6 deletions(-)

(limited to 'indra/llcommon/threadpool.cpp')

diff --git a/indra/llcommon/llthreadsafequeue.h b/indra/llcommon/llthreadsafequeue.h
index 5c934791fe..2806506550 100644
--- a/indra/llcommon/llthreadsafequeue.h
+++ b/indra/llcommon/llthreadsafequeue.h
@@ -275,6 +275,7 @@ template <typename ElementT, typename QueueT>
 template <typename CALLABLE>
 bool LLThreadSafeQueue<ElementT, QueueT>::tryLock(CALLABLE&& callable)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock1(mLock, std::defer_lock);
     if (!lock1.try_lock())
         return false;
@@ -291,6 +292,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryLockUntil(
     const std::chrono::time_point<Clock, Duration>& until,
     CALLABLE&& callable)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock1(mLock, std::defer_lock);
     if (!lock1.try_lock_until(until))
         return false;
@@ -304,6 +306,7 @@ template <typename ElementT, typename QueueT>
 template <typename T>
 bool LLThreadSafeQueue<ElementT, QueueT>::push_(lock_t& lock, T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if (mStorage.size() >= mCapacity)
         return false;
 
@@ -319,6 +322,7 @@ template <typename ElementT, typename QueueT>
 template <typename T>
 bool LLThreadSafeQueue<ElementT, QueueT>::pushIfOpen(T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock1(mLock);
     while (true)
     {
@@ -341,6 +345,7 @@ template <typename ElementT, typename QueueT>
 template<typename T>
 void LLThreadSafeQueue<ElementT, QueueT>::push(T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if (! pushIfOpen(std::forward<T>(element)))
     {
         LLTHROW(LLThreadSafeQueueInterrupt());
@@ -352,6 +357,7 @@ template<typename ElementT, typename QueueT>
 template<typename T>
 bool LLThreadSafeQueue<ElementT, QueueT>::tryPush(T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     return tryLock(
         [this, element=std::move(element)](lock_t& lock)
         {
@@ -368,6 +374,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryPushFor(
     const std::chrono::duration<Rep, Period>& timeout,
     T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     // Convert duration to time_point: passing the same timeout duration to
     // each of multiple calls is wrong.
     return tryPushUntil(std::chrono::steady_clock::now() + timeout,
@@ -381,6 +388,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryPushUntil(
     const std::chrono::time_point<Clock, Duration>& until,
     T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     return tryLockUntil(
         until,
         [this, until, element=std::move(element)](lock_t& lock)
@@ -413,6 +421,7 @@ template <typename ElementT, typename QueueT>
 typename LLThreadSafeQueue<ElementT, QueueT>::pop_result
 LLThreadSafeQueue<ElementT, QueueT>::pop_(lock_t& lock, ElementT& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     // If mStorage is empty, there's no head element.
     if (mStorage.empty())
         return mClosed? DONE : EMPTY;
@@ -434,6 +443,7 @@ LLThreadSafeQueue<ElementT, QueueT>::pop_(lock_t& lock, ElementT& element)
 template<typename ElementT, typename QueueT>
 ElementT LLThreadSafeQueue<ElementT, QueueT>::pop(void)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock1(mLock);
     ElementT value;
     while (true)
@@ -462,6 +472,7 @@ ElementT LLThreadSafeQueue<ElementT, QueueT>::pop(void)
 template<typename ElementT, typename QueueT>
 bool LLThreadSafeQueue<ElementT, QueueT>::tryPop(ElementT & element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     return tryLock(
         [this, &element](lock_t& lock)
         {
@@ -479,6 +490,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryPopFor(
     const std::chrono::duration<Rep, Period>& timeout,
     ElementT& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     // Convert duration to time_point: passing the same timeout duration to
     // each of multiple calls is wrong.
     return tryPopUntil(std::chrono::steady_clock::now() + timeout, element);
@@ -491,6 +503,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil(
     const std::chrono::time_point<Clock, Duration>& until,
     ElementT& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     return tryLockUntil(
         until,
         [this, until, &element](lock_t& lock)
@@ -510,6 +523,7 @@ LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil_(
     const std::chrono::time_point<Clock, Duration>& until,
     ElementT& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     while (true)
     {
         pop_result popped = pop_(lock, element);
@@ -536,6 +550,7 @@ LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil_(
 template<typename ElementT, typename QueueT>
 size_t LLThreadSafeQueue<ElementT, QueueT>::size(void)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock(mLock);
     return mStorage.size();
 }
@@ -544,6 +559,7 @@ size_t LLThreadSafeQueue<ElementT, QueueT>::size(void)
 template<typename ElementT, typename QueueT>
 void LLThreadSafeQueue<ElementT, QueueT>::close()
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock(mLock);
     mClosed = true;
     lock.unlock();
@@ -557,6 +573,7 @@ void LLThreadSafeQueue<ElementT, QueueT>::close()
 template<typename ElementT, typename QueueT>
 bool LLThreadSafeQueue<ElementT, QueueT>::isClosed()
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock(mLock);
     return mClosed;
 }
@@ -565,6 +582,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::isClosed()
 template<typename ElementT, typename QueueT>
 bool LLThreadSafeQueue<ElementT, QueueT>::done()
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock(mLock);
     return mClosed && mStorage.empty();
 }
diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp
index cf25cc838e..06e0dc5bfc 100644
--- a/indra/llcommon/threadpool.cpp
+++ b/indra/llcommon/threadpool.cpp
@@ -28,7 +28,11 @@ LL::ThreadPool::ThreadPool(const std::string& name, size_t threads, size_t capac
     for (size_t i = 0; i < threads; ++i)
     {
         std::string tname{ STRINGIZE(mName << ':' << (i+1) << '/' << threads) };
-        mThreads.emplace_back(tname, [this, tname](){ run(tname); });
+        mThreads.emplace_back(tname, [this, tname]()
+            {
+                LL_PROFILER_SET_THREAD_NAME(tname.c_str());
+                run(tname);
+            });
     }
     // Listen on "LLApp", and when the app is shutting down, close the queue
     // and join the workers.
diff --git a/indra/llcommon/threadsafeschedule.h b/indra/llcommon/threadsafeschedule.h
index c8ad23532b..601681d550 100644
--- a/indra/llcommon/threadsafeschedule.h
+++ b/indra/llcommon/threadsafeschedule.h
@@ -98,12 +98,14 @@ namespace LL
         // we could minimize redundancy by breaking out a common base class...
         void push(const DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             push(tuple_cons(Clock::now(), tuple));
         }
 
         /// individually pass each component of the TimeTuple
         void push(const TimePoint& time, Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             push(TimeTuple(time, std::forward<Args>(args)...));
         }
 
@@ -114,6 +116,7 @@ namespace LL
         // and call that overload.
         void push(Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             push(Clock::now(), std::forward<Args>(args)...);
         }
 
@@ -124,18 +127,21 @@ namespace LL
         /// DataTuple with implicit now
         bool tryPush(const DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPush(tuple_cons(Clock::now(), tuple));
         }
 
         /// individually pass components
         bool tryPush(const TimePoint& time, Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPush(TimeTuple(time, std::forward<Args>(args)...));
         }
 
         /// individually pass components with implicit now
         bool tryPush(Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPush(Clock::now(), std::forward<Args>(args)...);
         }
 
@@ -148,6 +154,7 @@ namespace LL
         bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
                         const DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushFor(timeout, tuple_cons(Clock::now(), tuple));
         }
 
@@ -156,6 +163,7 @@ namespace LL
         bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
                         const TimePoint& time, Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushFor(TimeTuple(time, std::forward<Args>(args)...));
         }
 
@@ -164,6 +172,7 @@ namespace LL
         bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
                         Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushFor(Clock::now(), std::forward<Args>(args)...);
         }
 
@@ -176,6 +185,7 @@ namespace LL
         bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
                           const DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushUntil(until, tuple_cons(Clock::now(), tuple));
         }
 
@@ -184,6 +194,7 @@ namespace LL
         bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
                           const TimePoint& time, Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushUntil(until, TimeTuple(time, std::forward<Args>(args)...));
         }
 
@@ -192,6 +203,7 @@ namespace LL
         bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
                           Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushUntil(until, Clock::now(), std::forward<Args>(args)...);
         }
 
@@ -209,12 +221,14 @@ namespace LL
         // haven't yet jumped through those hoops.
         DataTuple pop()
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tuple_cdr(popWithTime());
         }
 
         /// pop TimeTuple by value
         TimeTuple popWithTime()
         {
+            LL_PROFILE_ZONE_SCOPED;
             lock_t lock(super::mLock);
             // We can't just sit around waiting forever, given that there may
             // be items in the queue that are not yet ready but will *become*
@@ -254,6 +268,7 @@ namespace LL
         /// tryPop(DataTuple&)
         bool tryPop(DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimeTuple tt;
             if (! super::tryPop(tt))
                 return false;
@@ -264,6 +279,7 @@ namespace LL
         /// for when Args has exactly one type
         bool tryPop(typename std::tuple_element<1, TimeTuple>::type& value)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimeTuple tt;
             if (! super::tryPop(tt))
                 return false;
@@ -275,6 +291,7 @@ namespace LL
         template <typename Rep, typename Period, typename Tuple>
         bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, Tuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             // It's important to use OUR tryPopUntil() implementation, rather
             // than delegating immediately to our base class.
             return tryPopUntil(Clock::now() + timeout, tuple);
@@ -285,6 +302,7 @@ namespace LL
         bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
                          TimeTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             // super::tryPopUntil() wakes up when an item becomes available or
             // we hit 'until', whichever comes first. Thing is, the current
             // head of the queue could become ready sooner than either of
@@ -304,20 +322,25 @@ namespace LL
 
         pop_result tryPopUntil_(lock_t& lock, const TimePoint& until, TimeTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimePoint adjusted = until;
             if (! super::mStorage.empty())
             {
+                LL_PROFILE_ZONE_NAMED("tpu - adjust");
                 // use whichever is earlier: the head item's timestamp, or
                 // the caller's limit
                 adjusted = min(std::get<0>(super::mStorage.front()), adjusted);
             }
             // now delegate to base-class tryPopUntil_()
             pop_result popped;
-            while ((popped = pop_result(super::tryPopUntil_(lock, adjusted, tuple))) == WAITING)
             {
-                // If super::tryPopUntil_() returns WAITING, it means there's
-                // a head item, but it's not yet time. But it's worth looping
-                // back to recheck.
+                LL_PROFILE_ZONE_NAMED("tpu - super");
+                while ((popped = pop_result(super::tryPopUntil_(lock, adjusted, tuple))) == WAITING)
+                {
+                    // If super::tryPopUntil_() returns WAITING, it means there's
+                    // a head item, but it's not yet time. But it's worth looping
+                    // back to recheck.
+                }
             }
             return popped;
         }
@@ -327,6 +350,7 @@ namespace LL
         bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
                          DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimeTuple tt;
             if (! tryPopUntil(until, tt))
                 return false;
@@ -339,6 +363,7 @@ namespace LL
         bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
                          typename std::tuple_element<1, TimeTuple>::type& value)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimeTuple tt;
             if (! tryPopUntil(until, tt))
                 return false;
@@ -362,6 +387,7 @@ namespace LL
         // considering whether to deliver the current head element
         bool canPop(const TimeTuple& head) const override
         {
+            LL_PROFILE_ZONE_SCOPED;
             // an item with a future timestamp isn't yet ready to pop
             // (should we add some slop for overhead?)
             return std::get<0>(head) <= Clock::now();
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 1b6920fe3b..e623baa653 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -2257,6 +2257,7 @@ LLImageGLThread::LLImageGLThread(LLWindow* window)
     : ThreadPool("LLImageGL", 1, 1024*1024)
     , mWindow(window)
 {
+    LL_PROFILE_ZONE_SCOPED;
     mFinished = false;
 
     mContext = mWindow->createSharedContext();
@@ -2264,6 +2265,7 @@ LLImageGLThread::LLImageGLThread(LLWindow* window)
 
 void LLImageGLThread::run()
 {
+    LL_PROFILE_ZONE_SCOPED;
     // We must perform setup on this thread before actually servicing our
     // WorkQueue, likewise cleanup afterwards.
     mWindow->makeContextCurrent(mContext);
diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp
index b845f75ce4..777117b7c8 100644
--- a/indra/llwindow/llwindowwin32.cpp
+++ b/indra/llwindow/llwindowwin32.cpp
@@ -1697,7 +1697,7 @@ void* LLWindowWin32::createSharedContext()
         0
     };
 
-    HGLRC rc = wglCreateContextAttribsARB(mhDC, mhRC, attribs);
+    HGLRC rc = 0;
 
     bool done = false;
     while (!done)
-- 
cgit v1.3


From 2b96f89c2a374d72c0a8bc28a7b06ad4db7eae6e Mon Sep 17 00:00:00 2001
From: Nat Goodspeed <nat@lindenlab.com>
Date: Tue, 23 Nov 2021 20:39:32 -0500
Subject: SL-16400: Add ThreadPool::start() method, and call it.

It's sometimes important to finish other initialization before launching the
threads in the ThreadPool, so make that an explicit step. In particular, we
were launching the LLImageGL texture thread before initializing the GL
context, resulting in all gray textures.
---
 indra/llcommon/threadpool.cpp    | 10 +++++++---
 indra/llcommon/threadpool.h      |  9 +++++++++
 indra/llrender/llimagegl.cpp     |  5 +----
 indra/llwindow/llwindowwin32.cpp |  1 +
 4 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'indra/llcommon/threadpool.cpp')

diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp
index 06e0dc5bfc..ba914035e2 100644
--- a/indra/llcommon/threadpool.cpp
+++ b/indra/llcommon/threadpool.cpp
@@ -23,11 +23,15 @@
 
 LL::ThreadPool::ThreadPool(const std::string& name, size_t threads, size_t capacity):
     mQueue(name, capacity),
-    mName("ThreadPool:" + name)
+    mName("ThreadPool:" + name),
+    mThreadCount(threads)
+{}
+
+void LL::ThreadPool::start()
 {
-    for (size_t i = 0; i < threads; ++i)
+    for (size_t i = 0; i < mThreadCount; ++i)
     {
-        std::string tname{ STRINGIZE(mName << ':' << (i+1) << '/' << threads) };
+        std::string tname{ stringize(mName, ':', (i+1), '/', mThreadCount) };
         mThreads.emplace_back(tname, [this, tname]()
             {
                 LL_PROFILER_SET_THREAD_NAME(tname.c_str());
diff --git a/indra/llcommon/threadpool.h b/indra/llcommon/threadpool.h
index 1ca24aec58..b79c9b9090 100644
--- a/indra/llcommon/threadpool.h
+++ b/indra/llcommon/threadpool.h
@@ -32,6 +32,14 @@ namespace LL
         ThreadPool(const std::string& name, size_t threads=1, size_t capacity=1024);
         virtual ~ThreadPool();
 
+        /**
+         * Launch the ThreadPool. Until this call, a constructed ThreadPool
+         * launches no threads. That permits coders to derive from ThreadPool,
+         * or store it as a member of some other class, but refrain from
+         * launching it until all other construction is complete.
+         */
+        void start();
+
         /**
          * ThreadPool listens for application shutdown messages on the "LLApp"
          * LLEventPump. Call close() to shut down this ThreadPool early.
@@ -54,6 +62,7 @@ namespace LL
 
         WorkQueue mQueue;
         std::string mName;
+        size_t mThreadCount;
         std::vector<std::pair<std::string, std::thread>> mThreads;
     };
 
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index eda61d3c74..894eb8c773 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -2261,6 +2261,7 @@ LLImageGLThread::LLImageGLThread(LLWindow* window)
     mFinished = false;
 
     mContext = mWindow->createSharedContext();
+    ThreadPool::start();
 }
 
 void LLImageGLThread::run()
@@ -2268,10 +2269,6 @@ void LLImageGLThread::run()
     LL_PROFILE_ZONE_SCOPED;
     // We must perform setup on this thread before actually servicing our
     // WorkQueue, likewise cleanup afterwards.
-    while (mContext == nullptr)
-    { // HACK -- wait for mContext to be initialized since this thread will usually start before mContext is set
-        std::this_thread::sleep_for(std::chrono::milliseconds(1));
-    }
     mWindow->makeContextCurrent(mContext);
     gGL.init();
     ThreadPool::run();
diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp
index 51fd228619..af7b8d91f0 100644
--- a/indra/llwindow/llwindowwin32.cpp
+++ b/indra/llwindow/llwindowwin32.cpp
@@ -4495,6 +4495,7 @@ std::vector<std::string> LLWindowWin32::getDynamicFallbackFontList()
 inline LLWindowWin32::LLWindowWin32Thread::LLWindowWin32Thread()
     : ThreadPool("Window Thread", 1, MAX_QUEUE_SIZE)
 {
+    ThreadPool::start();
 }
 
 /**
-- 
cgit v1.3