aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/OpenSim/Framework/Monitoring/Watchdog.cs
diff options
context:
space:
mode:
Diffstat (limited to 'OpenSim/Framework/Monitoring/Watchdog.cs')
-rw-r--r--OpenSim/Framework/Monitoring/Watchdog.cs380
1 files changed, 380 insertions, 0 deletions
diff --git a/OpenSim/Framework/Monitoring/Watchdog.cs b/OpenSim/Framework/Monitoring/Watchdog.cs
new file mode 100644
index 0000000..a644fa5
--- /dev/null
+++ b/OpenSim/Framework/Monitoring/Watchdog.cs
@@ -0,0 +1,380 @@
1/*
2 * Copyright (c) Contributors, http://opensimulator.org/
3 * See CONTRIBUTORS.TXT for a full list of copyright holders.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the OpenSimulator Project nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28using System;
29using System.Collections.Generic;
30using System.Linq;
31using System.Threading;
32using log4net;
33
34namespace OpenSim.Framework.Monitoring
35{
36 /// <summary>
37 /// Manages launching threads and keeping watch over them for timeouts
38 /// </summary>
39 public static class Watchdog
40 {
41 private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
42
43 /// <summary>Timer interval in milliseconds for the watchdog timer</summary>
44 public const double WATCHDOG_INTERVAL_MS = 2500.0d;
45
46 /// <summary>Default timeout in milliseconds before a thread is considered dead</summary>
47 public const int DEFAULT_WATCHDOG_TIMEOUT_MS = 5000;
48
49 [System.Diagnostics.DebuggerDisplay("{Thread.Name}")]
50 public class ThreadWatchdogInfo
51 {
52 public Thread Thread { get; private set; }
53
54 /// <summary>
55 /// Approximate tick when this thread was started.
56 /// </summary>
57 /// <remarks>
58 /// Not terribly good since this quickly wraps around.
59 /// </remarks>
60 public int FirstTick { get; private set; }
61
62 /// <summary>
63 /// Last time this heartbeat update was invoked
64 /// </summary>
65 public int LastTick { get; set; }
66
67 /// <summary>
68 /// Number of milliseconds before we notify that the thread is having a problem.
69 /// </summary>
70 public int Timeout { get; set; }
71
72 /// <summary>
73 /// Is this thread considered timed out?
74 /// </summary>
75 public bool IsTimedOut { get; set; }
76
77 /// <summary>
78 /// Will this thread trigger the alarm function if it has timed out?
79 /// </summary>
80 public bool AlarmIfTimeout { get; set; }
81
82 /// <summary>
83 /// Method execute if alarm goes off. If null then no alarm method is fired.
84 /// </summary>
85 public Func<string> AlarmMethod { get; set; }
86
87 /// <summary>
88 /// Stat structure associated with this thread.
89 /// </summary>
90 public Stat Stat { get; set; }
91
92 public ThreadWatchdogInfo(Thread thread, int timeout, string name)
93 {
94 Thread = thread;
95 Timeout = timeout;
96 FirstTick = Environment.TickCount & Int32.MaxValue;
97 LastTick = FirstTick;
98
99 Stat
100 = new Stat(
101 name,
102 string.Format("Last update of thread {0}", name),
103 "",
104 "ms",
105 "server",
106 "thread",
107 StatType.Pull,
108 MeasuresOfInterest.None,
109 stat => stat.Value = Environment.TickCount & Int32.MaxValue - LastTick,
110 StatVerbosity.Debug);
111
112 StatsManager.RegisterStat(Stat);
113 }
114
115 public ThreadWatchdogInfo(ThreadWatchdogInfo previousTwi)
116 {
117 Thread = previousTwi.Thread;
118 FirstTick = previousTwi.FirstTick;
119 LastTick = previousTwi.LastTick;
120 Timeout = previousTwi.Timeout;
121 IsTimedOut = previousTwi.IsTimedOut;
122 AlarmIfTimeout = previousTwi.AlarmIfTimeout;
123 AlarmMethod = previousTwi.AlarmMethod;
124 }
125
126 public void Cleanup()
127 {
128 StatsManager.DeregisterStat(Stat);
129 }
130 }
131
132 /// <summary>
133 /// This event is called whenever a tracked thread is
134 /// stopped or has not called UpdateThread() in time<
135 /// /summary>
136 public static event Action<ThreadWatchdogInfo> OnWatchdogTimeout;
137
138 /// <summary>
139 /// Is this watchdog active?
140 /// </summary>
141 public static bool Enabled
142 {
143 get { return m_enabled; }
144 set
145 {
146 // m_log.DebugFormat("[MEMORY WATCHDOG]: Setting MemoryWatchdog.Enabled to {0}", value);
147
148 if (value == m_enabled)
149 return;
150
151 m_enabled = value;
152
153 if (m_enabled)
154 {
155 // Set now so we don't get alerted on the first run
156 LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue;
157 }
158
159 m_watchdogTimer.Enabled = m_enabled;
160 }
161 }
162
163 private static bool m_enabled;
164 private static Dictionary<int, ThreadWatchdogInfo> m_threads;
165 private static System.Timers.Timer m_watchdogTimer;
166
167 /// <summary>
168 /// Last time the watchdog thread ran.
169 /// </summary>
170 /// <remarks>
171 /// Should run every WATCHDOG_INTERVAL_MS
172 /// </remarks>
173 public static int LastWatchdogThreadTick { get; private set; }
174
175 static Watchdog()
176 {
177 m_threads = new Dictionary<int, ThreadWatchdogInfo>();
178 m_watchdogTimer = new System.Timers.Timer(WATCHDOG_INTERVAL_MS);
179 m_watchdogTimer.AutoReset = false;
180 m_watchdogTimer.Elapsed += WatchdogTimerElapsed;
181 }
182
183 /// <summary>
184 /// Add a thread to the watchdog tracker.
185 /// </summary>
186 /// <param name="info">Information about the thread.</info>
187 /// <param name="info">Name of the thread.</info>
188 /// <param name="log">If true then creation of thread is logged.</param>
189 public static void AddThread(ThreadWatchdogInfo info, string name, bool log = true)
190 {
191 if (log)
192 m_log.DebugFormat(
193 "[WATCHDOG]: Started tracking thread {0}, ID {1}", name, info.Thread.ManagedThreadId);
194
195 lock (m_threads)
196 m_threads.Add(info.Thread.ManagedThreadId, info);
197 }
198
199 /// <summary>
200 /// Marks the current thread as alive
201 /// </summary>
202 public static void UpdateThread()
203 {
204 UpdateThread(Thread.CurrentThread.ManagedThreadId);
205 }
206
207 /// <summary>
208 /// Stops watchdog tracking on the current thread
209 /// </summary>
210 /// <param name="log">If true then normal events in thread removal are not logged.</param>
211 /// <returns>
212 /// True if the thread was removed from the list of tracked
213 /// threads, otherwise false
214 /// </returns>
215 public static bool RemoveThread(bool log = true)
216 {
217 return RemoveThread(Thread.CurrentThread.ManagedThreadId, log);
218 }
219
220 private static bool RemoveThread(int threadID, bool log = true)
221 {
222 lock (m_threads)
223 {
224 ThreadWatchdogInfo twi;
225 if (m_threads.TryGetValue(threadID, out twi))
226 {
227 if (log)
228 m_log.DebugFormat(
229 "[WATCHDOG]: Removing thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId);
230
231 twi.Cleanup();
232 m_threads.Remove(threadID);
233
234 return true;
235 }
236 else
237 {
238 m_log.WarnFormat(
239 "[WATCHDOG]: Requested to remove thread with ID {0} but this is not being monitored", threadID);
240
241 return false;
242 }
243 }
244 }
245
246 public static bool AbortThread(int threadID)
247 {
248 lock (m_threads)
249 {
250 if (m_threads.ContainsKey(threadID))
251 {
252 ThreadWatchdogInfo twi = m_threads[threadID];
253 twi.Thread.Abort();
254 RemoveThread(threadID);
255
256 return true;
257 }
258 else
259 {
260 return false;
261 }
262 }
263 }
264
265 private static void UpdateThread(int threadID)
266 {
267 ThreadWatchdogInfo threadInfo;
268
269 // Although TryGetValue is not a thread safe operation, we use a try/catch here instead
270 // of a lock for speed. Adding/removing threads is a very rare operation compared to
271 // UpdateThread(), and a single UpdateThread() failure here and there won't break
272 // anything
273 try
274 {
275 if (m_threads.TryGetValue(threadID, out threadInfo))
276 {
277 threadInfo.LastTick = Environment.TickCount & Int32.MaxValue;
278 threadInfo.IsTimedOut = false;
279 }
280 else
281 {
282 m_log.WarnFormat("[WATCHDOG]: Asked to update thread {0} which is not being monitored", threadID);
283 }
284 }
285 catch { }
286 }
287
288 /// <summary>
289 /// Get currently watched threads for diagnostic purposes
290 /// </summary>
291 /// <returns></returns>
292 public static ThreadWatchdogInfo[] GetThreadsInfo()
293 {
294 lock (m_threads)
295 return m_threads.Values.ToArray();
296 }
297
298 /// <summary>
299 /// Return the current thread's watchdog info.
300 /// </summary>
301 /// <returns>The watchdog info. null if the thread isn't being monitored.</returns>
302 public static ThreadWatchdogInfo GetCurrentThreadInfo()
303 {
304 lock (m_threads)
305 {
306 if (m_threads.ContainsKey(Thread.CurrentThread.ManagedThreadId))
307 return m_threads[Thread.CurrentThread.ManagedThreadId];
308 }
309
310 return null;
311 }
312
313 /// <summary>
314 /// Check watched threads. Fire alarm if appropriate.
315 /// </summary>
316 /// <param name="sender"></param>
317 /// <param name="e"></param>
318 private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e)
319 {
320 int now = Environment.TickCount & Int32.MaxValue;
321 int msElapsed = now - LastWatchdogThreadTick;
322
323 if (msElapsed > WATCHDOG_INTERVAL_MS * 2)
324 m_log.WarnFormat(
325 "[WATCHDOG]: {0} ms since Watchdog last ran. Interval should be approximately {1} ms",
326 msElapsed, WATCHDOG_INTERVAL_MS);
327
328 LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue;
329
330 Action<ThreadWatchdogInfo> callback = OnWatchdogTimeout;
331
332 if (callback != null)
333 {
334 List<ThreadWatchdogInfo> callbackInfos = null;
335
336 lock (m_threads)
337 {
338 foreach (ThreadWatchdogInfo threadInfo in m_threads.Values)
339 {
340 if (threadInfo.Thread.ThreadState == ThreadState.Stopped)
341 {
342 RemoveThread(threadInfo.Thread.ManagedThreadId);
343
344 if (callbackInfos == null)
345 callbackInfos = new List<ThreadWatchdogInfo>();
346
347 callbackInfos.Add(threadInfo);
348 }
349 else if (!threadInfo.IsTimedOut && now - threadInfo.LastTick >= threadInfo.Timeout)
350 {
351 threadInfo.IsTimedOut = true;
352
353 if (threadInfo.AlarmIfTimeout)
354 {
355 if (callbackInfos == null)
356 callbackInfos = new List<ThreadWatchdogInfo>();
357
358 // Send a copy of the watchdog info to prevent race conditions where the watchdog
359 // thread updates the monitoring info after an alarm has been sent out.
360 callbackInfos.Add(new ThreadWatchdogInfo(threadInfo));
361 }
362 }
363 }
364 }
365
366 if (callbackInfos != null)
367 foreach (ThreadWatchdogInfo callbackInfo in callbackInfos)
368 callback(callbackInfo);
369 }
370
371 if (MemoryWatchdog.Enabled)
372 MemoryWatchdog.Update();
373
374 ChecksManager.CheckChecks();
375 StatsManager.RecordStats();
376
377 m_watchdogTimer.Start();
378 }
379 }
380} \ No newline at end of file