summaryrefslogtreecommitdiff
path: root/indra/llcommon/llwatchdog.cpp
diff options
context:
space:
mode:
authorJonathan "Geenz" Goodman <geenz@lindenlab.com>2026-05-19 18:51:02 -0400
committerGitHub <noreply@github.com>2026-05-19 18:51:02 -0400
commit6d4c4c029ce43aa413266135829cd2bc00001890 (patch)
tree209ae880aba5ac777d524bdc557ed166b466347f /indra/llcommon/llwatchdog.cpp
parent5c500ccf407f0b5a0b253b98dd4bd3f33f643aba (diff)
parent115686ca0f962da9ec5dbcdbe54a4cb6c86302bb (diff)
Merge pull request #5369 from secondlife/release/2026.02
Release/2026.02
Diffstat (limited to 'indra/llcommon/llwatchdog.cpp')
-rw-r--r--indra/llcommon/llwatchdog.cpp74
1 files changed, 61 insertions, 13 deletions
diff --git a/indra/llcommon/llwatchdog.cpp b/indra/llcommon/llwatchdog.cpp
index fa240a9ed7..66b565c763 100644
--- a/indra/llcommon/llwatchdog.cpp
+++ b/indra/llcommon/llwatchdog.cpp
@@ -28,6 +28,7 @@
#include "linden_common.h"
#include "llwatchdog.h"
+#include "llmutex.h"
#include "llthread.h"
constexpr U32 WATCHDOG_SLEEP_TIME_USEC = 1000000U;
@@ -86,7 +87,7 @@ void LLWatchdogEntry::start()
void LLWatchdogEntry::stop()
{
// this can happen very late in the shutdown sequence
- if (!LLWatchdog::wasDeleted())
+ if (LLWatchdog::instanceExists())
{
LLWatchdog::getInstance()->remove(this);
}
@@ -172,6 +173,17 @@ void LLWatchdog::add(LLWatchdogEntry* e)
{
lockThread();
mSuspects.insert(e);
+
+ if (!mFrozeList.empty())
+ {
+ mFrozeList.erase(e);
+ if (mFrozeList.empty())
+ {
+ // Clear error marker file if there is no frozen threads,
+ // viewer is responsive again.
+ mClearMarkerFnc();
+ }
+ }
unlockThread();
}
@@ -179,10 +191,16 @@ void LLWatchdog::remove(LLWatchdogEntry* e)
{
lockThread();
mSuspects.erase(e);
+ mFrozeList.erase(e);
unlockThread();
}
-void LLWatchdog::init(func_t set_error_state_callback)
+void LLWatchdog::init(
+ create_marker_func_t error_state_callback,
+ clear_marker_func_t clear_marker_callback,
+ report_func_t report_callback,
+ notify_func_t notify_callback,
+ bool crash_on_freeze)
{
if (!mSuspectsAccessMutex && !mTimer)
{
@@ -195,7 +213,11 @@ void LLWatchdog::init(func_t set_error_state_callback)
// start needs to use the mSuspectsAccessMutex
mTimer->start();
}
- mCreateMarkerFnc = set_error_state_callback;
+ mCreateMarkerFnc = error_state_callback;
+ mClearMarkerFnc = clear_marker_callback;
+ mCrashReportFnc = report_callback;
+ mNotifyFnc = notify_callback;
+ mCrashOnFreeze = crash_on_freeze;
}
void LLWatchdog::cleanup()
@@ -250,21 +272,47 @@ void LLWatchdog::run()
mTimer->stop();
}
- // Sets error marker file
- mCreateMarkerFnc();
- // Todo1: Warn user?
- // Todo2: We probably want to report even if 5 seconds passed, just not error 'yet'.
std::string last_state = (*result)->getLastState();
- if (last_state.empty())
+ std::string description = "Watchdog timer for thread " + (*result)->getThreadName() + " expired";
+ if (!last_state.empty())
{
- LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName()
- << " expired; assuming viewer is hung and crashing" << LL_ENDL;
+ description += " with state: " + last_state;
+ }
+ description += "; assuming viewer is hung and crashing";
+
+ if (!mCrashOnFreeze)
+ {
+ // Sets watchdog marker file
+ mCreateMarkerFnc(false);
+ // If it's mainloop and it somehow recovers, it will re-add itself
+ LLWatchdogEntry* froze_entry = *result;
+ mSuspects.erase(result);
+ mFrozeList.insert(froze_entry);
+ LL_WARNS() << description << LL_ENDL;
}
else
{
- LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName()
- << " expired with state: " << last_state
- << "; assuming viewer is hung and crashing" << LL_ENDL;
+
+ if (!mCrashReportFnc(description))
+ {
+ // Sets error marker file
+ mCreateMarkerFnc(true);
+ // If false is returned, then we failed to report the issue to bugsplat,
+ // instead, Notify user, then crash viewer.
+ // Todo: ask user if viewer should quit or wait?
+ mNotifyFnc();
+ LL_ERRS() << description << LL_ENDL;
+ }
+ else
+ {
+ // Sets watchdog marker file
+ mCreateMarkerFnc(false);
+ // Already reported, don't report again.
+ // If it's mainloop and it somehow recovers, it will re-add itself
+ LLWatchdogEntry* froze_entry = *result;
+ mSuspects.erase(result);
+ mFrozeList.insert(froze_entry);
+ }
}
}
}