diff options
| author | Jonathan "Geenz" Goodman <geenz@lindenlab.com> | 2026-05-19 18:51:02 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-05-19 18:51:02 -0400 |
| commit | 6d4c4c029ce43aa413266135829cd2bc00001890 (patch) | |
| tree | 209ae880aba5ac777d524bdc557ed166b466347f /indra/llcommon/llwatchdog.cpp | |
| parent | 5c500ccf407f0b5a0b253b98dd4bd3f33f643aba (diff) | |
| parent | 115686ca0f962da9ec5dbcdbe54a4cb6c86302bb (diff) | |
Merge pull request #5369 from secondlife/release/2026.02
Release/2026.02
Diffstat (limited to 'indra/llcommon/llwatchdog.cpp')
| -rw-r--r-- | indra/llcommon/llwatchdog.cpp | 74 |
1 files changed, 61 insertions, 13 deletions
diff --git a/indra/llcommon/llwatchdog.cpp b/indra/llcommon/llwatchdog.cpp index fa240a9ed7..66b565c763 100644 --- a/indra/llcommon/llwatchdog.cpp +++ b/indra/llcommon/llwatchdog.cpp @@ -28,6 +28,7 @@ #include "linden_common.h" #include "llwatchdog.h" +#include "llmutex.h" #include "llthread.h" constexpr U32 WATCHDOG_SLEEP_TIME_USEC = 1000000U; @@ -86,7 +87,7 @@ void LLWatchdogEntry::start() void LLWatchdogEntry::stop() { // this can happen very late in the shutdown sequence - if (!LLWatchdog::wasDeleted()) + if (LLWatchdog::instanceExists()) { LLWatchdog::getInstance()->remove(this); } @@ -172,6 +173,17 @@ void LLWatchdog::add(LLWatchdogEntry* e) { lockThread(); mSuspects.insert(e); + + if (!mFrozeList.empty()) + { + mFrozeList.erase(e); + if (mFrozeList.empty()) + { + // Clear error marker file if there is no frozen threads, + // viewer is responsive again. + mClearMarkerFnc(); + } + } unlockThread(); } @@ -179,10 +191,16 @@ void LLWatchdog::remove(LLWatchdogEntry* e) { lockThread(); mSuspects.erase(e); + mFrozeList.erase(e); unlockThread(); } -void LLWatchdog::init(func_t set_error_state_callback) +void LLWatchdog::init( + create_marker_func_t error_state_callback, + clear_marker_func_t clear_marker_callback, + report_func_t report_callback, + notify_func_t notify_callback, + bool crash_on_freeze) { if (!mSuspectsAccessMutex && !mTimer) { @@ -195,7 +213,11 @@ void LLWatchdog::init(func_t set_error_state_callback) // start needs to use the mSuspectsAccessMutex mTimer->start(); } - mCreateMarkerFnc = set_error_state_callback; + mCreateMarkerFnc = error_state_callback; + mClearMarkerFnc = clear_marker_callback; + mCrashReportFnc = report_callback; + mNotifyFnc = notify_callback; + mCrashOnFreeze = crash_on_freeze; } void LLWatchdog::cleanup() @@ -250,21 +272,47 @@ void LLWatchdog::run() mTimer->stop(); } - // Sets error marker file - mCreateMarkerFnc(); - // Todo1: Warn user? - // Todo2: We probably want to report even if 5 seconds passed, just not error 'yet'. std::string last_state = (*result)->getLastState(); - if (last_state.empty()) + std::string description = "Watchdog timer for thread " + (*result)->getThreadName() + " expired"; + if (!last_state.empty()) { - LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() - << " expired; assuming viewer is hung and crashing" << LL_ENDL; + description += " with state: " + last_state; + } + description += "; assuming viewer is hung and crashing"; + + if (!mCrashOnFreeze) + { + // Sets watchdog marker file + mCreateMarkerFnc(false); + // If it's mainloop and it somehow recovers, it will re-add itself + LLWatchdogEntry* froze_entry = *result; + mSuspects.erase(result); + mFrozeList.insert(froze_entry); + LL_WARNS() << description << LL_ENDL; } else { - LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() - << " expired with state: " << last_state - << "; assuming viewer is hung and crashing" << LL_ENDL; + + if (!mCrashReportFnc(description)) + { + // Sets error marker file + mCreateMarkerFnc(true); + // If false is returned, then we failed to report the issue to bugsplat, + // instead, Notify user, then crash viewer. + // Todo: ask user if viewer should quit or wait? + mNotifyFnc(); + LL_ERRS() << description << LL_ENDL; + } + else + { + // Sets watchdog marker file + mCreateMarkerFnc(false); + // Already reported, don't report again. + // If it's mainloop and it somehow recovers, it will re-add itself + LLWatchdogEntry* froze_entry = *result; + mSuspects.erase(result); + mFrozeList.insert(froze_entry); + } } } } |
