diff options
| author | Andrey Kleshchev <117672381+akleshchev@users.noreply.github.com> | 2026-03-06 03:20:55 +0200 |
|---|---|---|
| committer | Andrey Kleshchev <117672381+akleshchev@users.noreply.github.com> | 2026-03-09 23:18:56 +0200 |
| commit | fdd7e88b8b6ee98b212bb4d31de6c9f650cdf07d (patch) | |
| tree | 72b54cb1bfeb145d0f52bc1fcd06e9ec1469404a /indra/llcommon | |
| parent | 19690db372cbce8cafe7252a54682948c6afa28c (diff) | |
#5084 Improve watchdog's behavior
Diffstat (limited to 'indra/llcommon')
| -rw-r--r-- | indra/llcommon/llapp.h | 1 | ||||
| -rw-r--r-- | indra/llcommon/llwatchdog.cpp | 68 | ||||
| -rw-r--r-- | indra/llcommon/llwatchdog.h | 19 |
3 files changed, 73 insertions, 15 deletions
diff --git a/indra/llcommon/llapp.h b/indra/llcommon/llapp.h index ce09c566a9..fef7dc80b3 100644 --- a/indra/llcommon/llapp.h +++ b/indra/llcommon/llapp.h @@ -285,6 +285,7 @@ public: #ifdef LL_WINDOWS virtual bool reportCrashToBugsplat(void* pExcepInfo /*EXCEPTION_POINTERS*/) { return false; } + virtual bool reportCustomToBugsplat(const std::string& desription) { return false; } #endif public: diff --git a/indra/llcommon/llwatchdog.cpp b/indra/llcommon/llwatchdog.cpp index d3242a6c96..1622aeb180 100644 --- a/indra/llcommon/llwatchdog.cpp +++ b/indra/llcommon/llwatchdog.cpp @@ -173,6 +173,17 @@ void LLWatchdog::add(LLWatchdogEntry* e) { lockThread(); mSuspects.insert(e); + + if (!mFrozeList.empty()) + { + mFrozeList.erase(e); + if (mFrozeList.empty()) + { + // Clear error marker file if there is no frozen threads, + // viewer is responsive again. + mClearMarkerFnc(); + } + } unlockThread(); } @@ -183,7 +194,12 @@ void LLWatchdog::remove(LLWatchdogEntry* e) unlockThread(); } -void LLWatchdog::init(func_t set_error_state_callback) +void LLWatchdog::init( + create_marker_func_t error_state_callback, + clear_marker_func_t clear_marker_callback, + report_func_t report_callback, + notify_func_t notify_callback, + bool crash_on_freeze) { if (!mSuspectsAccessMutex && !mTimer) { @@ -196,7 +212,11 @@ void LLWatchdog::init(func_t set_error_state_callback) // start needs to use the mSuspectsAccessMutex mTimer->start(); } - mCreateMarkerFnc = set_error_state_callback; + mCreateMarkerFnc = error_state_callback; + mClearMarkerFnc = clear_marker_callback; + mCrashReportFnc = report_callback; + mNotifyFnc = notify_callback; + mCrashOnFreeze = crash_on_freeze; } void LLWatchdog::cleanup() @@ -251,21 +271,45 @@ void LLWatchdog::run() mTimer->stop(); } - // Sets error marker file - mCreateMarkerFnc(); - // Todo1: Warn user? - // Todo2: We probably want to report even if 5 seconds passed, just not error 'yet'. std::string last_state = (*result)->getLastState(); - if (last_state.empty()) + std::string description = "Watchdog timer for thread " + (*result)->getThreadName() + " expired"; + if (!last_state.empty()) { - LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() - << " expired; assuming viewer is hung and crashing" << LL_ENDL; + description += " with state: " + last_state; + } + description += "; assuming viewer is hung and crashing"; + + if (!mCrashOnFreeze) + { + // Sets watchdog marker file + mCreateMarkerFnc(false); + // If it's mainloop and it somehow recovers, it will re-add itself + mSuspects.erase(*result); + mFrozeList.insert(*result); + LL_WARNS() << description << LL_ENDL; } else { - LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() - << " expired with state: " << last_state - << "; assuming viewer is hung and crashing" << LL_ENDL; + + if (!mCrashReportFnc(description)) + { + // Sets error marker file + mCreateMarkerFnc(true); + // If false is returned, then we failed to report the issue to bugsplat, + // instead, Notify user, then crash viewer. + // Todo: ask user if viewer should quit or wait? + mNotifyFnc(); + LL_ERRS() << description << LL_ENDL; + } + else + { + // Sets watchdog marker file + mCreateMarkerFnc(false); + // Already reported, don't report again. + // If it's mainloop and it somehow recovers, it will re-add itself + mSuspects.erase(result); + mFrozeList.insert(*result); + } } } } diff --git a/indra/llcommon/llwatchdog.h b/indra/llcommon/llwatchdog.h index 2100a90879..f138fbccb0 100644 --- a/indra/llcommon/llwatchdog.h +++ b/indra/llcommon/llwatchdog.h @@ -93,8 +93,16 @@ public: void add(LLWatchdogEntry* e); void remove(LLWatchdogEntry* e); - typedef std::function<void()> func_t; - void init(func_t set_error_state_callback); + typedef std::function<void(bool)> create_marker_func_t; + typedef std::function<void()> clear_marker_func_t; + typedef std::function<bool(std::string&)> report_func_t; + typedef std::function<void()> notify_func_t; + void init( + create_marker_func_t error_state_callback, + clear_marker_func_t clear_marker_callback, + report_func_t report_callback, + notify_func_t notify_callback, + bool crash_on_freeze); void run(); void cleanup(); @@ -105,14 +113,19 @@ private: typedef std::set<LLWatchdogEntry*> SuspectsRegistry; SuspectsRegistry mSuspects; + SuspectsRegistry mFrozeList; LLMutex* mSuspectsAccessMutex; LLWatchdogTimerThread* mTimer; U64 mLastClockCount; + bool mCrashOnFreeze; // At the moment watchdog expects app to set markers in mCreateMarkerFnc, // but technically can be used to set any error states or do some cleanup // or show warnings. - func_t mCreateMarkerFnc; + create_marker_func_t mCreateMarkerFnc; + clear_marker_func_t mClearMarkerFnc; + report_func_t mCrashReportFnc; + notify_func_t mNotifyFnc; }; #endif // LL_LLTHREADWATCHDOG_H |
