diff options
Diffstat (limited to 'OpenSim/Framework/Monitoring/Watchdog.cs')
-rw-r--r-- | OpenSim/Framework/Monitoring/Watchdog.cs | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/OpenSim/Framework/Monitoring/Watchdog.cs b/OpenSim/Framework/Monitoring/Watchdog.cs new file mode 100644 index 0000000..a644fa5 --- /dev/null +++ b/OpenSim/Framework/Monitoring/Watchdog.cs | |||
@@ -0,0 +1,380 @@ | |||
1 | /* | ||
2 | * Copyright (c) Contributors, http://opensimulator.org/ | ||
3 | * See CONTRIBUTORS.TXT for a full list of copyright holders. | ||
4 | * | ||
5 | * Redistribution and use in source and binary forms, with or without | ||
6 | * modification, are permitted provided that the following conditions are met: | ||
7 | * * Redistributions of source code must retain the above copyright | ||
8 | * notice, this list of conditions and the following disclaimer. | ||
9 | * * Redistributions in binary form must reproduce the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer in the | ||
11 | * documentation and/or other materials provided with the distribution. | ||
12 | * * Neither the name of the OpenSimulator Project nor the | ||
13 | * names of its contributors may be used to endorse or promote products | ||
14 | * derived from this software without specific prior written permission. | ||
15 | * | ||
16 | * THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY | ||
17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
19 | * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY | ||
20 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
23 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
26 | */ | ||
27 | |||
28 | using System; | ||
29 | using System.Collections.Generic; | ||
30 | using System.Linq; | ||
31 | using System.Threading; | ||
32 | using log4net; | ||
33 | |||
34 | namespace OpenSim.Framework.Monitoring | ||
35 | { | ||
36 | /// <summary> | ||
37 | /// Manages launching threads and keeping watch over them for timeouts | ||
38 | /// </summary> | ||
39 | public static class Watchdog | ||
40 | { | ||
41 | private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType); | ||
42 | |||
43 | /// <summary>Timer interval in milliseconds for the watchdog timer</summary> | ||
44 | public const double WATCHDOG_INTERVAL_MS = 2500.0d; | ||
45 | |||
46 | /// <summary>Default timeout in milliseconds before a thread is considered dead</summary> | ||
47 | public const int DEFAULT_WATCHDOG_TIMEOUT_MS = 5000; | ||
48 | |||
49 | [System.Diagnostics.DebuggerDisplay("{Thread.Name}")] | ||
50 | public class ThreadWatchdogInfo | ||
51 | { | ||
52 | public Thread Thread { get; private set; } | ||
53 | |||
54 | /// <summary> | ||
55 | /// Approximate tick when this thread was started. | ||
56 | /// </summary> | ||
57 | /// <remarks> | ||
58 | /// Not terribly good since this quickly wraps around. | ||
59 | /// </remarks> | ||
60 | public int FirstTick { get; private set; } | ||
61 | |||
62 | /// <summary> | ||
63 | /// Last time this heartbeat update was invoked | ||
64 | /// </summary> | ||
65 | public int LastTick { get; set; } | ||
66 | |||
67 | /// <summary> | ||
68 | /// Number of milliseconds before we notify that the thread is having a problem. | ||
69 | /// </summary> | ||
70 | public int Timeout { get; set; } | ||
71 | |||
72 | /// <summary> | ||
73 | /// Is this thread considered timed out? | ||
74 | /// </summary> | ||
75 | public bool IsTimedOut { get; set; } | ||
76 | |||
77 | /// <summary> | ||
78 | /// Will this thread trigger the alarm function if it has timed out? | ||
79 | /// </summary> | ||
80 | public bool AlarmIfTimeout { get; set; } | ||
81 | |||
82 | /// <summary> | ||
83 | /// Method execute if alarm goes off. If null then no alarm method is fired. | ||
84 | /// </summary> | ||
85 | public Func<string> AlarmMethod { get; set; } | ||
86 | |||
87 | /// <summary> | ||
88 | /// Stat structure associated with this thread. | ||
89 | /// </summary> | ||
90 | public Stat Stat { get; set; } | ||
91 | |||
92 | public ThreadWatchdogInfo(Thread thread, int timeout, string name) | ||
93 | { | ||
94 | Thread = thread; | ||
95 | Timeout = timeout; | ||
96 | FirstTick = Environment.TickCount & Int32.MaxValue; | ||
97 | LastTick = FirstTick; | ||
98 | |||
99 | Stat | ||
100 | = new Stat( | ||
101 | name, | ||
102 | string.Format("Last update of thread {0}", name), | ||
103 | "", | ||
104 | "ms", | ||
105 | "server", | ||
106 | "thread", | ||
107 | StatType.Pull, | ||
108 | MeasuresOfInterest.None, | ||
109 | stat => stat.Value = Environment.TickCount & Int32.MaxValue - LastTick, | ||
110 | StatVerbosity.Debug); | ||
111 | |||
112 | StatsManager.RegisterStat(Stat); | ||
113 | } | ||
114 | |||
115 | public ThreadWatchdogInfo(ThreadWatchdogInfo previousTwi) | ||
116 | { | ||
117 | Thread = previousTwi.Thread; | ||
118 | FirstTick = previousTwi.FirstTick; | ||
119 | LastTick = previousTwi.LastTick; | ||
120 | Timeout = previousTwi.Timeout; | ||
121 | IsTimedOut = previousTwi.IsTimedOut; | ||
122 | AlarmIfTimeout = previousTwi.AlarmIfTimeout; | ||
123 | AlarmMethod = previousTwi.AlarmMethod; | ||
124 | } | ||
125 | |||
126 | public void Cleanup() | ||
127 | { | ||
128 | StatsManager.DeregisterStat(Stat); | ||
129 | } | ||
130 | } | ||
131 | |||
132 | /// <summary> | ||
133 | /// This event is called whenever a tracked thread is | ||
134 | /// stopped or has not called UpdateThread() in time< | ||
135 | /// /summary> | ||
136 | public static event Action<ThreadWatchdogInfo> OnWatchdogTimeout; | ||
137 | |||
138 | /// <summary> | ||
139 | /// Is this watchdog active? | ||
140 | /// </summary> | ||
141 | public static bool Enabled | ||
142 | { | ||
143 | get { return m_enabled; } | ||
144 | set | ||
145 | { | ||
146 | // m_log.DebugFormat("[MEMORY WATCHDOG]: Setting MemoryWatchdog.Enabled to {0}", value); | ||
147 | |||
148 | if (value == m_enabled) | ||
149 | return; | ||
150 | |||
151 | m_enabled = value; | ||
152 | |||
153 | if (m_enabled) | ||
154 | { | ||
155 | // Set now so we don't get alerted on the first run | ||
156 | LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue; | ||
157 | } | ||
158 | |||
159 | m_watchdogTimer.Enabled = m_enabled; | ||
160 | } | ||
161 | } | ||
162 | |||
163 | private static bool m_enabled; | ||
164 | private static Dictionary<int, ThreadWatchdogInfo> m_threads; | ||
165 | private static System.Timers.Timer m_watchdogTimer; | ||
166 | |||
167 | /// <summary> | ||
168 | /// Last time the watchdog thread ran. | ||
169 | /// </summary> | ||
170 | /// <remarks> | ||
171 | /// Should run every WATCHDOG_INTERVAL_MS | ||
172 | /// </remarks> | ||
173 | public static int LastWatchdogThreadTick { get; private set; } | ||
174 | |||
175 | static Watchdog() | ||
176 | { | ||
177 | m_threads = new Dictionary<int, ThreadWatchdogInfo>(); | ||
178 | m_watchdogTimer = new System.Timers.Timer(WATCHDOG_INTERVAL_MS); | ||
179 | m_watchdogTimer.AutoReset = false; | ||
180 | m_watchdogTimer.Elapsed += WatchdogTimerElapsed; | ||
181 | } | ||
182 | |||
183 | /// <summary> | ||
184 | /// Add a thread to the watchdog tracker. | ||
185 | /// </summary> | ||
186 | /// <param name="info">Information about the thread.</info> | ||
187 | /// <param name="info">Name of the thread.</info> | ||
188 | /// <param name="log">If true then creation of thread is logged.</param> | ||
189 | public static void AddThread(ThreadWatchdogInfo info, string name, bool log = true) | ||
190 | { | ||
191 | if (log) | ||
192 | m_log.DebugFormat( | ||
193 | "[WATCHDOG]: Started tracking thread {0}, ID {1}", name, info.Thread.ManagedThreadId); | ||
194 | |||
195 | lock (m_threads) | ||
196 | m_threads.Add(info.Thread.ManagedThreadId, info); | ||
197 | } | ||
198 | |||
199 | /// <summary> | ||
200 | /// Marks the current thread as alive | ||
201 | /// </summary> | ||
202 | public static void UpdateThread() | ||
203 | { | ||
204 | UpdateThread(Thread.CurrentThread.ManagedThreadId); | ||
205 | } | ||
206 | |||
207 | /// <summary> | ||
208 | /// Stops watchdog tracking on the current thread | ||
209 | /// </summary> | ||
210 | /// <param name="log">If true then normal events in thread removal are not logged.</param> | ||
211 | /// <returns> | ||
212 | /// True if the thread was removed from the list of tracked | ||
213 | /// threads, otherwise false | ||
214 | /// </returns> | ||
215 | public static bool RemoveThread(bool log = true) | ||
216 | { | ||
217 | return RemoveThread(Thread.CurrentThread.ManagedThreadId, log); | ||
218 | } | ||
219 | |||
220 | private static bool RemoveThread(int threadID, bool log = true) | ||
221 | { | ||
222 | lock (m_threads) | ||
223 | { | ||
224 | ThreadWatchdogInfo twi; | ||
225 | if (m_threads.TryGetValue(threadID, out twi)) | ||
226 | { | ||
227 | if (log) | ||
228 | m_log.DebugFormat( | ||
229 | "[WATCHDOG]: Removing thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId); | ||
230 | |||
231 | twi.Cleanup(); | ||
232 | m_threads.Remove(threadID); | ||
233 | |||
234 | return true; | ||
235 | } | ||
236 | else | ||
237 | { | ||
238 | m_log.WarnFormat( | ||
239 | "[WATCHDOG]: Requested to remove thread with ID {0} but this is not being monitored", threadID); | ||
240 | |||
241 | return false; | ||
242 | } | ||
243 | } | ||
244 | } | ||
245 | |||
246 | public static bool AbortThread(int threadID) | ||
247 | { | ||
248 | lock (m_threads) | ||
249 | { | ||
250 | if (m_threads.ContainsKey(threadID)) | ||
251 | { | ||
252 | ThreadWatchdogInfo twi = m_threads[threadID]; | ||
253 | twi.Thread.Abort(); | ||
254 | RemoveThread(threadID); | ||
255 | |||
256 | return true; | ||
257 | } | ||
258 | else | ||
259 | { | ||
260 | return false; | ||
261 | } | ||
262 | } | ||
263 | } | ||
264 | |||
265 | private static void UpdateThread(int threadID) | ||
266 | { | ||
267 | ThreadWatchdogInfo threadInfo; | ||
268 | |||
269 | // Although TryGetValue is not a thread safe operation, we use a try/catch here instead | ||
270 | // of a lock for speed. Adding/removing threads is a very rare operation compared to | ||
271 | // UpdateThread(), and a single UpdateThread() failure here and there won't break | ||
272 | // anything | ||
273 | try | ||
274 | { | ||
275 | if (m_threads.TryGetValue(threadID, out threadInfo)) | ||
276 | { | ||
277 | threadInfo.LastTick = Environment.TickCount & Int32.MaxValue; | ||
278 | threadInfo.IsTimedOut = false; | ||
279 | } | ||
280 | else | ||
281 | { | ||
282 | m_log.WarnFormat("[WATCHDOG]: Asked to update thread {0} which is not being monitored", threadID); | ||
283 | } | ||
284 | } | ||
285 | catch { } | ||
286 | } | ||
287 | |||
288 | /// <summary> | ||
289 | /// Get currently watched threads for diagnostic purposes | ||
290 | /// </summary> | ||
291 | /// <returns></returns> | ||
292 | public static ThreadWatchdogInfo[] GetThreadsInfo() | ||
293 | { | ||
294 | lock (m_threads) | ||
295 | return m_threads.Values.ToArray(); | ||
296 | } | ||
297 | |||
298 | /// <summary> | ||
299 | /// Return the current thread's watchdog info. | ||
300 | /// </summary> | ||
301 | /// <returns>The watchdog info. null if the thread isn't being monitored.</returns> | ||
302 | public static ThreadWatchdogInfo GetCurrentThreadInfo() | ||
303 | { | ||
304 | lock (m_threads) | ||
305 | { | ||
306 | if (m_threads.ContainsKey(Thread.CurrentThread.ManagedThreadId)) | ||
307 | return m_threads[Thread.CurrentThread.ManagedThreadId]; | ||
308 | } | ||
309 | |||
310 | return null; | ||
311 | } | ||
312 | |||
313 | /// <summary> | ||
314 | /// Check watched threads. Fire alarm if appropriate. | ||
315 | /// </summary> | ||
316 | /// <param name="sender"></param> | ||
317 | /// <param name="e"></param> | ||
318 | private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e) | ||
319 | { | ||
320 | int now = Environment.TickCount & Int32.MaxValue; | ||
321 | int msElapsed = now - LastWatchdogThreadTick; | ||
322 | |||
323 | if (msElapsed > WATCHDOG_INTERVAL_MS * 2) | ||
324 | m_log.WarnFormat( | ||
325 | "[WATCHDOG]: {0} ms since Watchdog last ran. Interval should be approximately {1} ms", | ||
326 | msElapsed, WATCHDOG_INTERVAL_MS); | ||
327 | |||
328 | LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue; | ||
329 | |||
330 | Action<ThreadWatchdogInfo> callback = OnWatchdogTimeout; | ||
331 | |||
332 | if (callback != null) | ||
333 | { | ||
334 | List<ThreadWatchdogInfo> callbackInfos = null; | ||
335 | |||
336 | lock (m_threads) | ||
337 | { | ||
338 | foreach (ThreadWatchdogInfo threadInfo in m_threads.Values) | ||
339 | { | ||
340 | if (threadInfo.Thread.ThreadState == ThreadState.Stopped) | ||
341 | { | ||
342 | RemoveThread(threadInfo.Thread.ManagedThreadId); | ||
343 | |||
344 | if (callbackInfos == null) | ||
345 | callbackInfos = new List<ThreadWatchdogInfo>(); | ||
346 | |||
347 | callbackInfos.Add(threadInfo); | ||
348 | } | ||
349 | else if (!threadInfo.IsTimedOut && now - threadInfo.LastTick >= threadInfo.Timeout) | ||
350 | { | ||
351 | threadInfo.IsTimedOut = true; | ||
352 | |||
353 | if (threadInfo.AlarmIfTimeout) | ||
354 | { | ||
355 | if (callbackInfos == null) | ||
356 | callbackInfos = new List<ThreadWatchdogInfo>(); | ||
357 | |||
358 | // Send a copy of the watchdog info to prevent race conditions where the watchdog | ||
359 | // thread updates the monitoring info after an alarm has been sent out. | ||
360 | callbackInfos.Add(new ThreadWatchdogInfo(threadInfo)); | ||
361 | } | ||
362 | } | ||
363 | } | ||
364 | } | ||
365 | |||
366 | if (callbackInfos != null) | ||
367 | foreach (ThreadWatchdogInfo callbackInfo in callbackInfos) | ||
368 | callback(callbackInfo); | ||
369 | } | ||
370 | |||
371 | if (MemoryWatchdog.Enabled) | ||
372 | MemoryWatchdog.Update(); | ||
373 | |||
374 | ChecksManager.CheckChecks(); | ||
375 | StatsManager.RecordStats(); | ||
376 | |||
377 | m_watchdogTimer.Start(); | ||
378 | } | ||
379 | } | ||
380 | } \ No newline at end of file | ||