From 6eab9e63f6b4ce3b94878b0a9aa82aca5237ae2e Mon Sep 17 00:00:00 2001 From: Andrey Kleshchev <117672381+akleshchev@users.noreply.github.com> Date: Tue, 3 Mar 2026 19:49:38 +0200 Subject: #5084 Convert watchdog to a simpleton --- indra/newview/llappviewer.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'indra/newview/llappviewer.cpp') diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index bae9749772..20dba648fd 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -1240,6 +1240,7 @@ bool LLAppViewer::init() // TODO: consider moving proxy initialization here or LLCopocedureManager after proxy initialization, may be implement // some other protection to make sure we don't use network before initializng proxy + LLWatchdog::createInstance(); /*----------------------------------------------------------------------*/ // nat 2016-06-29 moved the following here from the former mainLoop(). mMainloopTimeout = new LLWatchdogTimeout("mainloop"); -- cgit v1.3 From 19690db372cbce8cafe7252a54682948c6afa28c Mon Sep 17 00:00:00 2001 From: Andrey Kleshchev <117672381+akleshchev@users.noreply.github.com> Date: Fri, 6 Mar 2026 03:32:23 +0200 Subject: #5084 Fix watchdog's simpleton init It's needed for initWindow() --- indra/newview/llappviewer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/newview/llappviewer.cpp') diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 20dba648fd..4bca656108 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -961,6 +961,7 @@ bool LLAppViewer::init() // Initialize event recorder LLViewerEventRecorder::createInstance(); + LLWatchdog::createInstance(); // // Initialize the window @@ -1240,7 +1241,6 @@ bool LLAppViewer::init() // TODO: consider moving proxy initialization here or LLCopocedureManager after proxy initialization, may be implement // some other protection to make sure we don't use network before initializng proxy - LLWatchdog::createInstance(); /*----------------------------------------------------------------------*/ // nat 2016-06-29 moved the following here from the former mainLoop(). mMainloopTimeout = new LLWatchdogTimeout("mainloop"); -- cgit v1.3 From fdd7e88b8b6ee98b212bb4d31de6c9f650cdf07d Mon Sep 17 00:00:00 2001 From: Andrey Kleshchev <117672381+akleshchev@users.noreply.github.com> Date: Fri, 6 Mar 2026 03:20:55 +0200 Subject: #5084 Improve watchdog's behavior --- indra/llcommon/llapp.h | 1 + indra/llcommon/llwatchdog.cpp | 68 +++++++++--- indra/llcommon/llwatchdog.h | 19 +++- indra/newview/llappviewer.cpp | 142 ++++++++++++++++++++----- indra/newview/llappviewer.h | 3 + indra/newview/llappviewerwin32.cpp | 50 ++++++++- indra/newview/llappviewerwin32.h | 1 + indra/newview/skins/default/xui/en/strings.xml | 3 + 8 files changed, 246 insertions(+), 41 deletions(-) (limited to 'indra/newview/llappviewer.cpp') diff --git a/indra/llcommon/llapp.h b/indra/llcommon/llapp.h index ce09c566a9..fef7dc80b3 100644 --- a/indra/llcommon/llapp.h +++ b/indra/llcommon/llapp.h @@ -285,6 +285,7 @@ public: #ifdef LL_WINDOWS virtual bool reportCrashToBugsplat(void* pExcepInfo /*EXCEPTION_POINTERS*/) { return false; } + virtual bool reportCustomToBugsplat(const std::string& desription) { return false; } #endif public: diff --git a/indra/llcommon/llwatchdog.cpp b/indra/llcommon/llwatchdog.cpp index d3242a6c96..1622aeb180 100644 --- a/indra/llcommon/llwatchdog.cpp +++ b/indra/llcommon/llwatchdog.cpp @@ -173,6 +173,17 @@ void LLWatchdog::add(LLWatchdogEntry* e) { lockThread(); mSuspects.insert(e); + + if (!mFrozeList.empty()) + { + mFrozeList.erase(e); + if (mFrozeList.empty()) + { + // Clear error marker file if there is no frozen threads, + // viewer is responsive again. + mClearMarkerFnc(); + } + } unlockThread(); } @@ -183,7 +194,12 @@ void LLWatchdog::remove(LLWatchdogEntry* e) unlockThread(); } -void LLWatchdog::init(func_t set_error_state_callback) +void LLWatchdog::init( + create_marker_func_t error_state_callback, + clear_marker_func_t clear_marker_callback, + report_func_t report_callback, + notify_func_t notify_callback, + bool crash_on_freeze) { if (!mSuspectsAccessMutex && !mTimer) { @@ -196,7 +212,11 @@ void LLWatchdog::init(func_t set_error_state_callback) // start needs to use the mSuspectsAccessMutex mTimer->start(); } - mCreateMarkerFnc = set_error_state_callback; + mCreateMarkerFnc = error_state_callback; + mClearMarkerFnc = clear_marker_callback; + mCrashReportFnc = report_callback; + mNotifyFnc = notify_callback; + mCrashOnFreeze = crash_on_freeze; } void LLWatchdog::cleanup() @@ -251,21 +271,45 @@ void LLWatchdog::run() mTimer->stop(); } - // Sets error marker file - mCreateMarkerFnc(); - // Todo1: Warn user? - // Todo2: We probably want to report even if 5 seconds passed, just not error 'yet'. std::string last_state = (*result)->getLastState(); - if (last_state.empty()) + std::string description = "Watchdog timer for thread " + (*result)->getThreadName() + " expired"; + if (!last_state.empty()) { - LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() - << " expired; assuming viewer is hung and crashing" << LL_ENDL; + description += " with state: " + last_state; + } + description += "; assuming viewer is hung and crashing"; + + if (!mCrashOnFreeze) + { + // Sets watchdog marker file + mCreateMarkerFnc(false); + // If it's mainloop and it somehow recovers, it will re-add itself + mSuspects.erase(*result); + mFrozeList.insert(*result); + LL_WARNS() << description << LL_ENDL; } else { - LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() - << " expired with state: " << last_state - << "; assuming viewer is hung and crashing" << LL_ENDL; + + if (!mCrashReportFnc(description)) + { + // Sets error marker file + mCreateMarkerFnc(true); + // If false is returned, then we failed to report the issue to bugsplat, + // instead, Notify user, then crash viewer. + // Todo: ask user if viewer should quit or wait? + mNotifyFnc(); + LL_ERRS() << description << LL_ENDL; + } + else + { + // Sets watchdog marker file + mCreateMarkerFnc(false); + // Already reported, don't report again. + // If it's mainloop and it somehow recovers, it will re-add itself + mSuspects.erase(result); + mFrozeList.insert(*result); + } } } } diff --git a/indra/llcommon/llwatchdog.h b/indra/llcommon/llwatchdog.h index 2100a90879..f138fbccb0 100644 --- a/indra/llcommon/llwatchdog.h +++ b/indra/llcommon/llwatchdog.h @@ -93,8 +93,16 @@ public: void add(LLWatchdogEntry* e); void remove(LLWatchdogEntry* e); - typedef std::function func_t; - void init(func_t set_error_state_callback); + typedef std::function create_marker_func_t; + typedef std::function clear_marker_func_t; + typedef std::function report_func_t; + typedef std::function notify_func_t; + void init( + create_marker_func_t error_state_callback, + clear_marker_func_t clear_marker_callback, + report_func_t report_callback, + notify_func_t notify_callback, + bool crash_on_freeze); void run(); void cleanup(); @@ -105,14 +113,19 @@ private: typedef std::set SuspectsRegistry; SuspectsRegistry mSuspects; + SuspectsRegistry mFrozeList; LLMutex* mSuspectsAccessMutex; LLWatchdogTimerThread* mTimer; U64 mLastClockCount; + bool mCrashOnFreeze; // At the moment watchdog expects app to set markers in mCreateMarkerFnc, // but technically can be used to set any error states or do some cleanup // or show warnings. - func_t mCreateMarkerFnc; + create_marker_func_t mCreateMarkerFnc; + clear_marker_func_t mClearMarkerFnc; + report_func_t mCrashReportFnc; + notify_func_t mNotifyFnc; }; #endif // LL_LLTHREADWATCHDOG_H diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 4bca656108..288da8ead8 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -377,6 +377,7 @@ const std::string MARKER_FILE_NAME("SecondLife.exec_marker"); const std::string START_MARKER_FILE_NAME("SecondLife.start_marker"); const std::string ERROR_MARKER_FILE_NAME("SecondLife.error_marker"); const std::string LOGOUT_MARKER_FILE_NAME("SecondLife.logout_marker"); +const std::string WATCHDOG_MARKER_FILE_NAME("SecondLife.watchdog_marker"); static std::string gLaunchFileOnQuit; // Used on Win32 for other apps to identify our window (eg, win_setup) @@ -3201,20 +3202,60 @@ bool LLAppViewer::initWindow() << " (setting = " << watchdog_enabled_setting << ")" << LL_ENDL; - if (use_watchdog) + // Watchdog reports to statistics via marker files, that is + // pointless without ability to write (!mSecondInstance) those files. + // If use_watchdog is set, watchdog also reports to bugspat. + if (use_watchdog || !mSecondInstance) { - LLWatchdog::getInstance()->init([]() - { - LLAppViewer* app = LLAppViewer::instance(); - if (app->logoutRequestSent()) + LLWatchdog::getInstance()->init( + [](bool final_marker) { - app->createErrorMarker(LAST_EXEC_LOGOUT_FROZE); - } - else + LLAppViewer* app = LLAppViewer::instance(); + // Without watchdog everything will be counted as + // either 'unknown' (no crash marker) or based of present crash marker + if (final_marker) + { + // watchdog is going to crash viewer, so crate a 'crash' marker + if (app->logoutRequestSent()) + { + app->createErrorMarker(LAST_EXEC_LOGOUT_FROZE); + } + else + { + app->createErrorMarker(LAST_EXEC_FROZE); + } + } + else + { + // not going to crash, just create a 'watchdog' marker + app->createWatchdogMarker(); + } + }, + []() { - app->createErrorMarker(LAST_EXEC_FROZE); - } - }); + LLAppViewer* app = LLAppViewer::instance(); + // in case process recovered from freeze, remove watchdog marker. + app->removeWatchdogMarker(); + }, + [](std::string &desc) + { +#if LL_WINDOWS && LL_BUGSPLAT + LLAppViewer* app = LLAppViewer::instance(); + app->writeDebugInfo(); + return app->reportCustomToBugsplat(desc); +#else + return false; +#endif + }, + []() + { + LLAppViewer* app = LLAppViewer::instance(); + app->sendLogoutRequest(); + // Might be better to ask user if user wants to terminate the app or wait. + OSMessageBox(LLTrans::getString("MBFreezeDetected"), LLTrans::getString("MBFatalError"), OSMB_OK); + }, + use_watchdog); + } LLNotificationsUI::LLNotificationManager::getInstance(); @@ -3997,13 +4038,8 @@ void LLAppViewer::processMarkerFiles() { // the file existed, is ours, and matched our version, so we can report on what it says LL_INFOS("MarkerFile") << "Exec marker '"<< mMarkerFileName << "' found; last exec crashed or froze" << LL_ENDL; -#if LL_WINDOWS && LL_BUGSPLAT - // bugsplat will set correct state in bugsplatSendLog - // Might be more accurate to rename this one into 'unknown' + // App terminated unexpectedly or froze, we don't know the cause yet. gLastExecEvent = LAST_EXEC_UNKNOWN; -#else - gLastExecEvent = LAST_EXEC_OTHER_CRASH; -#endif // LL_WINDOWS } else @@ -4056,23 +4092,29 @@ void LLAppViewer::processMarkerFiles() } LLAPRFile::remove(logout_marker_file); } - // and last refine based on whether or not a marker created during a non-llerr crash is found + // Refine based on whether or not a marker created during + // a crash is found or if wathdog caught a freeze. + // Bugsplat will set correct state in bugsplatSendLog. std::string error_marker_file = gDirUtilp->getExpandedFilename(LL_PATH_LOGS, ERROR_MARKER_FILE_NAME); + std::string watchdog_marker_file = gDirUtilp->getExpandedFilename(LL_PATH_LOGS, WATCHDOG_MARKER_FILE_NAME); if(LLAPRFile::isExist(error_marker_file, NULL, LL_APR_RB)) { S32 marker_code = getMarkerErrorCode(error_marker_file); if (marker_code >= 0) { - if (gLastExecEvent == LAST_EXEC_LOGOUT_FROZE) - { - gLastExecEvent = LAST_EXEC_LOGOUT_CRASH; - LL_INFOS("MarkerFile") << "Error marker '"<< error_marker_file << "' crashed, setting LastExecEvent to LOGOUT_CRASH" << LL_ENDL; - } - else if (marker_code > 0 && marker_code < (S32)LAST_EXEC_COUNT) + if (marker_code > 0 && marker_code < (S32)LAST_EXEC_COUNT) { + // If we have a code, it takes precendence gLastExecEvent = (eLastExecEvent)marker_code; LL_INFOS("MarkerFile") << "Error marker '"<< error_marker_file << "' crashed, setting LastExecEvent to " << gLastExecEvent << LL_ENDL; } + // if we have the marker, even without a code, it's a crash. + else if (gLastExecEvent == LAST_EXEC_LOGOUT_UNKNOWN + || gLastExecEvent == LAST_EXEC_LOGOUT_FROZE) + { + gLastExecEvent = LAST_EXEC_LOGOUT_CRASH; + LL_INFOS("MarkerFile") << "Error marker '" << error_marker_file << "' crashed, setting LastExecEvent to LOGOUT_CRASH" << LL_ENDL; + } else { gLastExecEvent = LAST_EXEC_OTHER_CRASH; @@ -4084,6 +4126,33 @@ void LLAppViewer::processMarkerFiles() LL_INFOS("MarkerFile") << "Error marker '"<< error_marker_file << "' marker found, but versions did not match" << LL_ENDL; } LLAPRFile::remove(error_marker_file); + if (LLAPRFile::isExist(watchdog_marker_file, NULL, LL_APR_RB)) + { + // If viewer crashed after a freeze was detected, + // crash still takes precendence. Just clear watchdog. + removeWatchdogMarker(); + } + } + else + { + // so only check watchdog marker if there is no error marker. + if (LLAPRFile::isExist(watchdog_marker_file, NULL, LL_APR_RB)) + { + if (LAST_EXEC_UNKNOWN == gLastExecEvent + || LAST_EXEC_LOGOUT_UNKNOWN == gLastExecEvent) + { + // watchdog marker gets created if we detect a freeze, + // so if viwer did not stop gracefully, and we know it wasn't a crash, + // we have no other info, check watchdog. + if (markerIsSameVersion(watchdog_marker_file)) + { + gLastExecEvent = LAST_EXEC_UNKNOWN == gLastExecEvent ? LAST_EXEC_FROZE : LAST_EXEC_LOGOUT_FROZE; + LL_INFOS("MarkerFile") << "Watchdog marker '" << watchdog_marker_file << "' found, setting LastExecEvent to FROZE" + << LL_ENDL; + } + } + removeWatchdogMarker(); + } } #if LL_DARWIN @@ -4128,6 +4197,7 @@ void LLAppViewer::removeMarkerFiles() { LL_WARNS("MarkerFile") << "logout marker '"<getExpandedFilename(LL_PATH_LOGS, WATCHDOG_MARKER_FILE_NAME); + + LLAPRFile file; + file.open(error_marker, LL_APR_WB); + if (file.getFileHandle()) + { + recordMarkerVersion(file); + file.close(); + } + } +} +void LLAppViewer::removeWatchdogMarker() const +{ + if (!mSecondInstance) + { + std::string error_marker_file = gDirUtilp->getExpandedFilename(LL_PATH_LOGS, WATCHDOG_MARKER_FILE_NAME); + LLFile::remove(error_marker_file); + } +} + void LLAppViewer::outOfMemorySoftQuit() { if (!mQuitRequested) diff --git a/indra/newview/llappviewer.h b/indra/newview/llappviewer.h index e1119419af..71033b6d3f 100644 --- a/indra/newview/llappviewer.h +++ b/indra/newview/llappviewer.h @@ -257,6 +257,9 @@ public: void createErrorMarker(eLastExecEvent error_code) const; bool errorMarkerExists() const; + void createWatchdogMarker() const; + void removeWatchdogMarker() const; + // Attempt a 'soft' quit with disconnect and saving of settings/cache. // Intended to be thread safe. // Good chance of viewer crashing either way, but better than alternatives. diff --git a/indra/newview/llappviewerwin32.cpp b/indra/newview/llappviewerwin32.cpp index 0620b625d9..45c8c369ca 100644 --- a/indra/newview/llappviewerwin32.cpp +++ b/indra/newview/llappviewerwin32.cpp @@ -114,6 +114,7 @@ namespace // MiniDmpSender pointer. As things stand, though, we must define an // actual function and store the pointer statically. static MiniDmpSender *sBugSplatSender = nullptr; + static std::string sBugsplatDesriptionField; bool bugsplatSendLog(UINT nCode, LPVOID lpVal1, LPVOID lpVal2) { @@ -150,8 +151,21 @@ namespace WCSTR(gDirUtilp->getExpandedFilename(LL_PATH_PER_SL_ACCOUNT, "settings_per_account.xml"))); } - // LL_ERRS message, when there is one - sBugSplatSender->setDefaultUserDescription(WCSTR(LLError::getFatalMessage())); + if (!sBugsplatDesriptionField.empty()) + { + // Can be set by watchdog or other code that detects a problem + // and wants to add some context to the crash report. + // Will be visible in the BugSplat web UI. + sBugSplatSender->setDefaultUserDescription(WCSTR(LLError::getFatalMessage())); + // This type of crash is not nessesarily a crash, or final. + // Prepare for the next one. + sBugsplatDesriptionField.clear(); + } + else + { + // LL_ERRS message, when there is one + sBugSplatSender->setDefaultUserDescription(WCSTR(LLError::getFatalMessage())); + } sBugSplatSender->setAttribute(WCSTR(L"OS"), WCSTR(LLOSInfo::instance().getOSStringSimple())); // In case we ever stop using email for this sBugSplatSender->setAttribute(WCSTR(L"AppState"), WCSTR(LLStartUp::getStartupStateString())); @@ -833,6 +847,38 @@ bool LLAppViewerWin32::reportCrashToBugsplat(void* pExcepInfo) return false; } +#if defined(LL_BUGSPLAT) +static int reportCustomToBugsplatFilter(EXCEPTION_POINTERS* pExcepInfo) +{ + if (sBugSplatSender) + { + sBugSplatSender->createReport(pExcepInfo); + } + return EXCEPTION_EXECUTE_HANDLER; +} +#endif + +bool LLAppViewerWin32::reportCustomToBugsplat(const std::string &description) +{ +#if defined(LL_BUGSPLAT) + if (sBugSplatSender) + { + sBugsplatDesriptionField = description; + + __try + { + // Generate a custom exception code + RaiseException(0xE0000001, 0, 0, NULL); + } + __except (reportCustomToBugsplatFilter(GetExceptionInformation())) + { + } + return true; + } +#endif // LL_BUGSPLAT + return false; +} + bool LLAppViewerWin32::initWindow() { // This is a workaround/hotfix for a change in Windows 11 24H2 (and possibly later) diff --git a/indra/newview/llappviewerwin32.h b/indra/newview/llappviewerwin32.h index 3fad53ec72..5abcca1ce9 100644 --- a/indra/newview/llappviewerwin32.h +++ b/indra/newview/llappviewerwin32.h @@ -44,6 +44,7 @@ public: bool cleanup() override; bool reportCrashToBugsplat(void* pExcepInfo) override; + bool reportCustomToBugsplat(const std::string& desription) override; protected: bool initWindow() override; // Override to initialize the viewer's window. diff --git a/indra/newview/skins/default/xui/en/strings.xml b/indra/newview/skins/default/xui/en/strings.xml index 1860d38b0e..e8bd5cc51b 100644 --- a/indra/newview/skins/default/xui/en/strings.xml +++ b/indra/newview/skins/default/xui/en/strings.xml @@ -3007,6 +3007,9 @@ If this message persists, restart your computer. [APP_NAME] appears to have frozen or crashed on the previous run. Would you like to send a crash report? + + [APP_NAME] appears to have frozen. If this issue occurs regularly, please contact support at https://support.secondlife.com. + Notification [APP_NAME] is unable to detect DirectX 9.0b or greater. -- cgit v1.3