aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/OpenSim/Framework/Monitoring/Watchdog.cs
diff options
context:
space:
mode:
Diffstat (limited to 'OpenSim/Framework/Monitoring/Watchdog.cs')
-rw-r--r--OpenSim/Framework/Monitoring/Watchdog.cs334
1 files changed, 334 insertions, 0 deletions
diff --git a/OpenSim/Framework/Monitoring/Watchdog.cs b/OpenSim/Framework/Monitoring/Watchdog.cs
new file mode 100644
index 0000000..e4db964
--- /dev/null
+++ b/OpenSim/Framework/Monitoring/Watchdog.cs
@@ -0,0 +1,334 @@
1/*
2 * Copyright (c) Contributors, http://opensimulator.org/
3 * See CONTRIBUTORS.TXT for a full list of copyright holders.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the OpenSimulator Project nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28using System;
29using System.Collections.Generic;
30using System.Linq;
31using System.Threading;
32using log4net;
33
34namespace OpenSim.Framework.Monitoring
35{
36 /// <summary>
37 /// Manages launching threads and keeping watch over them for timeouts
38 /// </summary>
39 public static class Watchdog
40 {
41 /// <summary>Timer interval in milliseconds for the watchdog timer</summary>
42 const double WATCHDOG_INTERVAL_MS = 2500.0d;
43
44 /// <summary>Default timeout in milliseconds before a thread is considered dead</summary>
45 public const int DEFAULT_WATCHDOG_TIMEOUT_MS = 5000;
46
47 [System.Diagnostics.DebuggerDisplay("{Thread.Name}")]
48 public class ThreadWatchdogInfo
49 {
50 public Thread Thread { get; private set; }
51
52 /// <summary>
53 /// Approximate tick when this thread was started.
54 /// </summary>
55 /// <remarks>
56 /// Not terribly good since this quickly wraps around.
57 /// </remarks>
58 public int FirstTick { get; private set; }
59
60 /// <summary>
61 /// Last time this heartbeat update was invoked
62 /// </summary>
63 public int LastTick { get; set; }
64
65 /// <summary>
66 /// Number of milliseconds before we notify that the thread is having a problem.
67 /// </summary>
68 public int Timeout { get; set; }
69
70 /// <summary>
71 /// Is this thread considered timed out?
72 /// </summary>
73 public bool IsTimedOut { get; set; }
74
75 /// <summary>
76 /// Will this thread trigger the alarm function if it has timed out?
77 /// </summary>
78 public bool AlarmIfTimeout { get; set; }
79
80 /// <summary>
81 /// Method execute if alarm goes off. If null then no alarm method is fired.
82 /// </summary>
83 public Func<string> AlarmMethod { get; set; }
84
85 public ThreadWatchdogInfo(Thread thread, int timeout)
86 {
87 Thread = thread;
88 Timeout = timeout;
89 FirstTick = Environment.TickCount & Int32.MaxValue;
90 LastTick = FirstTick;
91 }
92 }
93
94 /// <summary>
95 /// This event is called whenever a tracked thread is
96 /// stopped or has not called UpdateThread() in time<
97 /// /summary>
98 public static event Action<ThreadWatchdogInfo> OnWatchdogTimeout;
99
100 private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
101 private static Dictionary<int, ThreadWatchdogInfo> m_threads;
102 private static System.Timers.Timer m_watchdogTimer;
103
104 /// <summary>
105 /// Last time the watchdog thread ran.
106 /// </summary>
107 /// <remarks>
108 /// Should run every WATCHDOG_INTERVAL_MS
109 /// </remarks>
110 public static int LastWatchdogThreadTick { get; private set; }
111
112 static Watchdog()
113 {
114 m_threads = new Dictionary<int, ThreadWatchdogInfo>();
115 m_watchdogTimer = new System.Timers.Timer(WATCHDOG_INTERVAL_MS);
116 m_watchdogTimer.AutoReset = false;
117 m_watchdogTimer.Elapsed += WatchdogTimerElapsed;
118
119 // Set now so we don't get alerted on the first run
120 LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue;
121
122 m_watchdogTimer.Start();
123 }
124
125 /// <summary>
126 /// Start a new thread that is tracked by the watchdog timer.
127 /// </summary>
128 /// <param name="start">The method that will be executed in a new thread</param>
129 /// <param name="name">A name to give to the new thread</param>
130 /// <param name="priority">Priority to run the thread at</param>
131 /// <param name="isBackground">True to run this thread as a background thread, otherwise false</param>
132 /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param>
133 /// <returns>The newly created Thread object</returns>
134 public static Thread StartThread(
135 ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout)
136 {
137 return StartThread(start, name, priority, isBackground, alarmIfTimeout, null, DEFAULT_WATCHDOG_TIMEOUT_MS);
138 }
139
140 /// <summary>
141 /// Start a new thread that is tracked by the watchdog timer
142 /// </summary>
143 /// <param name="start">The method that will be executed in a new thread</param>
144 /// <param name="name">A name to give to the new thread</param>
145 /// <param name="priority">Priority to run the thread at</param>
146 /// <param name="isBackground">True to run this thread as a background
147 /// thread, otherwise false</param>
148 /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param>
149 /// <param name="alarmMethod">
150 /// Alarm method to call if alarmIfTimeout is true and there is a timeout.
151 /// Normally, this will just return some useful debugging information.
152 /// </param>
153 /// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param>
154 /// <returns>The newly created Thread object</returns>
155 public static Thread StartThread(
156 ThreadStart start, string name, ThreadPriority priority, bool isBackground,
157 bool alarmIfTimeout, Func<string> alarmMethod, int timeout)
158 {
159 Thread thread = new Thread(start);
160 thread.Name = name;
161 thread.Priority = priority;
162 thread.IsBackground = isBackground;
163
164 ThreadWatchdogInfo twi
165 = new ThreadWatchdogInfo(thread, timeout)
166 { AlarmIfTimeout = alarmIfTimeout, AlarmMethod = alarmMethod };
167
168 m_log.DebugFormat(
169 "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId);
170
171 lock (m_threads)
172 m_threads.Add(twi.Thread.ManagedThreadId, twi);
173
174 thread.Start();
175
176 return thread;
177 }
178
179 /// <summary>
180 /// Marks the current thread as alive
181 /// </summary>
182 public static void UpdateThread()
183 {
184 UpdateThread(Thread.CurrentThread.ManagedThreadId);
185 }
186
187 /// <summary>
188 /// Stops watchdog tracking on the current thread
189 /// </summary>
190 /// <returns>
191 /// True if the thread was removed from the list of tracked
192 /// threads, otherwise false
193 /// </returns>
194 public static bool RemoveThread()
195 {
196 return RemoveThread(Thread.CurrentThread.ManagedThreadId);
197 }
198
199 private static bool RemoveThread(int threadID)
200 {
201 lock (m_threads)
202 return m_threads.Remove(threadID);
203 }
204
205 public static bool AbortThread(int threadID)
206 {
207 lock (m_threads)
208 {
209 if (m_threads.ContainsKey(threadID))
210 {
211 ThreadWatchdogInfo twi = m_threads[threadID];
212 twi.Thread.Abort();
213 RemoveThread(threadID);
214
215 return true;
216 }
217 else
218 {
219 return false;
220 }
221 }
222 }
223
224 private static void UpdateThread(int threadID)
225 {
226 ThreadWatchdogInfo threadInfo;
227
228 // Although TryGetValue is not a thread safe operation, we use a try/catch here instead
229 // of a lock for speed. Adding/removing threads is a very rare operation compared to
230 // UpdateThread(), and a single UpdateThread() failure here and there won't break
231 // anything
232 try
233 {
234 if (m_threads.TryGetValue(threadID, out threadInfo))
235 {
236 threadInfo.LastTick = Environment.TickCount & Int32.MaxValue;
237 threadInfo.IsTimedOut = false;
238 }
239 else
240 {
241 m_log.WarnFormat("[WATCHDOG]: Asked to update thread {0} which is not being monitored", threadID);
242 }
243 }
244 catch { }
245 }
246
247 /// <summary>
248 /// Get currently watched threads for diagnostic purposes
249 /// </summary>
250 /// <returns></returns>
251 public static ThreadWatchdogInfo[] GetThreadsInfo()
252 {
253 lock (m_threads)
254 return m_threads.Values.ToArray();
255 }
256
257 /// <summary>
258 /// Return the current thread's watchdog info.
259 /// </summary>
260 /// <returns>The watchdog info. null if the thread isn't being monitored.</returns>
261 public static ThreadWatchdogInfo GetCurrentThreadInfo()
262 {
263 lock (m_threads)
264 {
265 if (m_threads.ContainsKey(Thread.CurrentThread.ManagedThreadId))
266 return m_threads[Thread.CurrentThread.ManagedThreadId];
267 }
268
269 return null;
270 }
271
272 /// <summary>
273 /// Check watched threads. Fire alarm if appropriate.
274 /// </summary>
275 /// <param name="sender"></param>
276 /// <param name="e"></param>
277 private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e)
278 {
279 int now = Environment.TickCount & Int32.MaxValue;
280 int msElapsed = now - LastWatchdogThreadTick;
281
282 if (msElapsed > WATCHDOG_INTERVAL_MS * 2)
283 m_log.WarnFormat(
284 "[WATCHDOG]: {0} ms since Watchdog last ran. Interval should be approximately {1} ms",
285 msElapsed, WATCHDOG_INTERVAL_MS);
286
287 LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue;
288
289 Action<ThreadWatchdogInfo> callback = OnWatchdogTimeout;
290
291 if (callback != null)
292 {
293 List<ThreadWatchdogInfo> callbackInfos = null;
294
295 lock (m_threads)
296 {
297 foreach (ThreadWatchdogInfo threadInfo in m_threads.Values)
298 {
299 if (threadInfo.Thread.ThreadState == ThreadState.Stopped)
300 {
301 RemoveThread(threadInfo.Thread.ManagedThreadId);
302
303 if (callbackInfos == null)
304 callbackInfos = new List<ThreadWatchdogInfo>();
305
306 callbackInfos.Add(threadInfo);
307 }
308 else if (!threadInfo.IsTimedOut && now - threadInfo.LastTick >= threadInfo.Timeout)
309 {
310 threadInfo.IsTimedOut = true;
311
312 if (threadInfo.AlarmIfTimeout)
313 {
314 if (callbackInfos == null)
315 callbackInfos = new List<ThreadWatchdogInfo>();
316
317 callbackInfos.Add(threadInfo);
318 }
319 }
320 }
321 }
322
323 if (callbackInfos != null)
324 foreach (ThreadWatchdogInfo callbackInfo in callbackInfos)
325 callback(callbackInfo);
326 }
327
328 if (MemoryWatchdog.Enabled)
329 MemoryWatchdog.Update();
330
331 m_watchdogTimer.Start();
332 }
333 }
334}