diff options
Diffstat (limited to 'OpenSim/Framework/Monitoring/Watchdog.cs')
-rw-r--r-- | OpenSim/Framework/Monitoring/Watchdog.cs | 334 |
1 files changed, 334 insertions, 0 deletions
diff --git a/OpenSim/Framework/Monitoring/Watchdog.cs b/OpenSim/Framework/Monitoring/Watchdog.cs new file mode 100644 index 0000000..e4db964 --- /dev/null +++ b/OpenSim/Framework/Monitoring/Watchdog.cs | |||
@@ -0,0 +1,334 @@ | |||
1 | /* | ||
2 | * Copyright (c) Contributors, http://opensimulator.org/ | ||
3 | * See CONTRIBUTORS.TXT for a full list of copyright holders. | ||
4 | * | ||
5 | * Redistribution and use in source and binary forms, with or without | ||
6 | * modification, are permitted provided that the following conditions are met: | ||
7 | * * Redistributions of source code must retain the above copyright | ||
8 | * notice, this list of conditions and the following disclaimer. | ||
9 | * * Redistributions in binary form must reproduce the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer in the | ||
11 | * documentation and/or other materials provided with the distribution. | ||
12 | * * Neither the name of the OpenSimulator Project nor the | ||
13 | * names of its contributors may be used to endorse or promote products | ||
14 | * derived from this software without specific prior written permission. | ||
15 | * | ||
16 | * THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY | ||
17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
19 | * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY | ||
20 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
23 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
26 | */ | ||
27 | |||
28 | using System; | ||
29 | using System.Collections.Generic; | ||
30 | using System.Linq; | ||
31 | using System.Threading; | ||
32 | using log4net; | ||
33 | |||
34 | namespace OpenSim.Framework.Monitoring | ||
35 | { | ||
36 | /// <summary> | ||
37 | /// Manages launching threads and keeping watch over them for timeouts | ||
38 | /// </summary> | ||
39 | public static class Watchdog | ||
40 | { | ||
41 | /// <summary>Timer interval in milliseconds for the watchdog timer</summary> | ||
42 | const double WATCHDOG_INTERVAL_MS = 2500.0d; | ||
43 | |||
44 | /// <summary>Default timeout in milliseconds before a thread is considered dead</summary> | ||
45 | public const int DEFAULT_WATCHDOG_TIMEOUT_MS = 5000; | ||
46 | |||
47 | [System.Diagnostics.DebuggerDisplay("{Thread.Name}")] | ||
48 | public class ThreadWatchdogInfo | ||
49 | { | ||
50 | public Thread Thread { get; private set; } | ||
51 | |||
52 | /// <summary> | ||
53 | /// Approximate tick when this thread was started. | ||
54 | /// </summary> | ||
55 | /// <remarks> | ||
56 | /// Not terribly good since this quickly wraps around. | ||
57 | /// </remarks> | ||
58 | public int FirstTick { get; private set; } | ||
59 | |||
60 | /// <summary> | ||
61 | /// Last time this heartbeat update was invoked | ||
62 | /// </summary> | ||
63 | public int LastTick { get; set; } | ||
64 | |||
65 | /// <summary> | ||
66 | /// Number of milliseconds before we notify that the thread is having a problem. | ||
67 | /// </summary> | ||
68 | public int Timeout { get; set; } | ||
69 | |||
70 | /// <summary> | ||
71 | /// Is this thread considered timed out? | ||
72 | /// </summary> | ||
73 | public bool IsTimedOut { get; set; } | ||
74 | |||
75 | /// <summary> | ||
76 | /// Will this thread trigger the alarm function if it has timed out? | ||
77 | /// </summary> | ||
78 | public bool AlarmIfTimeout { get; set; } | ||
79 | |||
80 | /// <summary> | ||
81 | /// Method execute if alarm goes off. If null then no alarm method is fired. | ||
82 | /// </summary> | ||
83 | public Func<string> AlarmMethod { get; set; } | ||
84 | |||
85 | public ThreadWatchdogInfo(Thread thread, int timeout) | ||
86 | { | ||
87 | Thread = thread; | ||
88 | Timeout = timeout; | ||
89 | FirstTick = Environment.TickCount & Int32.MaxValue; | ||
90 | LastTick = FirstTick; | ||
91 | } | ||
92 | } | ||
93 | |||
94 | /// <summary> | ||
95 | /// This event is called whenever a tracked thread is | ||
96 | /// stopped or has not called UpdateThread() in time< | ||
97 | /// /summary> | ||
98 | public static event Action<ThreadWatchdogInfo> OnWatchdogTimeout; | ||
99 | |||
100 | private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType); | ||
101 | private static Dictionary<int, ThreadWatchdogInfo> m_threads; | ||
102 | private static System.Timers.Timer m_watchdogTimer; | ||
103 | |||
104 | /// <summary> | ||
105 | /// Last time the watchdog thread ran. | ||
106 | /// </summary> | ||
107 | /// <remarks> | ||
108 | /// Should run every WATCHDOG_INTERVAL_MS | ||
109 | /// </remarks> | ||
110 | public static int LastWatchdogThreadTick { get; private set; } | ||
111 | |||
112 | static Watchdog() | ||
113 | { | ||
114 | m_threads = new Dictionary<int, ThreadWatchdogInfo>(); | ||
115 | m_watchdogTimer = new System.Timers.Timer(WATCHDOG_INTERVAL_MS); | ||
116 | m_watchdogTimer.AutoReset = false; | ||
117 | m_watchdogTimer.Elapsed += WatchdogTimerElapsed; | ||
118 | |||
119 | // Set now so we don't get alerted on the first run | ||
120 | LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue; | ||
121 | |||
122 | m_watchdogTimer.Start(); | ||
123 | } | ||
124 | |||
125 | /// <summary> | ||
126 | /// Start a new thread that is tracked by the watchdog timer. | ||
127 | /// </summary> | ||
128 | /// <param name="start">The method that will be executed in a new thread</param> | ||
129 | /// <param name="name">A name to give to the new thread</param> | ||
130 | /// <param name="priority">Priority to run the thread at</param> | ||
131 | /// <param name="isBackground">True to run this thread as a background thread, otherwise false</param> | ||
132 | /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param> | ||
133 | /// <returns>The newly created Thread object</returns> | ||
134 | public static Thread StartThread( | ||
135 | ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout) | ||
136 | { | ||
137 | return StartThread(start, name, priority, isBackground, alarmIfTimeout, null, DEFAULT_WATCHDOG_TIMEOUT_MS); | ||
138 | } | ||
139 | |||
140 | /// <summary> | ||
141 | /// Start a new thread that is tracked by the watchdog timer | ||
142 | /// </summary> | ||
143 | /// <param name="start">The method that will be executed in a new thread</param> | ||
144 | /// <param name="name">A name to give to the new thread</param> | ||
145 | /// <param name="priority">Priority to run the thread at</param> | ||
146 | /// <param name="isBackground">True to run this thread as a background | ||
147 | /// thread, otherwise false</param> | ||
148 | /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param> | ||
149 | /// <param name="alarmMethod"> | ||
150 | /// Alarm method to call if alarmIfTimeout is true and there is a timeout. | ||
151 | /// Normally, this will just return some useful debugging information. | ||
152 | /// </param> | ||
153 | /// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param> | ||
154 | /// <returns>The newly created Thread object</returns> | ||
155 | public static Thread StartThread( | ||
156 | ThreadStart start, string name, ThreadPriority priority, bool isBackground, | ||
157 | bool alarmIfTimeout, Func<string> alarmMethod, int timeout) | ||
158 | { | ||
159 | Thread thread = new Thread(start); | ||
160 | thread.Name = name; | ||
161 | thread.Priority = priority; | ||
162 | thread.IsBackground = isBackground; | ||
163 | |||
164 | ThreadWatchdogInfo twi | ||
165 | = new ThreadWatchdogInfo(thread, timeout) | ||
166 | { AlarmIfTimeout = alarmIfTimeout, AlarmMethod = alarmMethod }; | ||
167 | |||
168 | m_log.DebugFormat( | ||
169 | "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId); | ||
170 | |||
171 | lock (m_threads) | ||
172 | m_threads.Add(twi.Thread.ManagedThreadId, twi); | ||
173 | |||
174 | thread.Start(); | ||
175 | |||
176 | return thread; | ||
177 | } | ||
178 | |||
179 | /// <summary> | ||
180 | /// Marks the current thread as alive | ||
181 | /// </summary> | ||
182 | public static void UpdateThread() | ||
183 | { | ||
184 | UpdateThread(Thread.CurrentThread.ManagedThreadId); | ||
185 | } | ||
186 | |||
187 | /// <summary> | ||
188 | /// Stops watchdog tracking on the current thread | ||
189 | /// </summary> | ||
190 | /// <returns> | ||
191 | /// True if the thread was removed from the list of tracked | ||
192 | /// threads, otherwise false | ||
193 | /// </returns> | ||
194 | public static bool RemoveThread() | ||
195 | { | ||
196 | return RemoveThread(Thread.CurrentThread.ManagedThreadId); | ||
197 | } | ||
198 | |||
199 | private static bool RemoveThread(int threadID) | ||
200 | { | ||
201 | lock (m_threads) | ||
202 | return m_threads.Remove(threadID); | ||
203 | } | ||
204 | |||
205 | public static bool AbortThread(int threadID) | ||
206 | { | ||
207 | lock (m_threads) | ||
208 | { | ||
209 | if (m_threads.ContainsKey(threadID)) | ||
210 | { | ||
211 | ThreadWatchdogInfo twi = m_threads[threadID]; | ||
212 | twi.Thread.Abort(); | ||
213 | RemoveThread(threadID); | ||
214 | |||
215 | return true; | ||
216 | } | ||
217 | else | ||
218 | { | ||
219 | return false; | ||
220 | } | ||
221 | } | ||
222 | } | ||
223 | |||
224 | private static void UpdateThread(int threadID) | ||
225 | { | ||
226 | ThreadWatchdogInfo threadInfo; | ||
227 | |||
228 | // Although TryGetValue is not a thread safe operation, we use a try/catch here instead | ||
229 | // of a lock for speed. Adding/removing threads is a very rare operation compared to | ||
230 | // UpdateThread(), and a single UpdateThread() failure here and there won't break | ||
231 | // anything | ||
232 | try | ||
233 | { | ||
234 | if (m_threads.TryGetValue(threadID, out threadInfo)) | ||
235 | { | ||
236 | threadInfo.LastTick = Environment.TickCount & Int32.MaxValue; | ||
237 | threadInfo.IsTimedOut = false; | ||
238 | } | ||
239 | else | ||
240 | { | ||
241 | m_log.WarnFormat("[WATCHDOG]: Asked to update thread {0} which is not being monitored", threadID); | ||
242 | } | ||
243 | } | ||
244 | catch { } | ||
245 | } | ||
246 | |||
247 | /// <summary> | ||
248 | /// Get currently watched threads for diagnostic purposes | ||
249 | /// </summary> | ||
250 | /// <returns></returns> | ||
251 | public static ThreadWatchdogInfo[] GetThreadsInfo() | ||
252 | { | ||
253 | lock (m_threads) | ||
254 | return m_threads.Values.ToArray(); | ||
255 | } | ||
256 | |||
257 | /// <summary> | ||
258 | /// Return the current thread's watchdog info. | ||
259 | /// </summary> | ||
260 | /// <returns>The watchdog info. null if the thread isn't being monitored.</returns> | ||
261 | public static ThreadWatchdogInfo GetCurrentThreadInfo() | ||
262 | { | ||
263 | lock (m_threads) | ||
264 | { | ||
265 | if (m_threads.ContainsKey(Thread.CurrentThread.ManagedThreadId)) | ||
266 | return m_threads[Thread.CurrentThread.ManagedThreadId]; | ||
267 | } | ||
268 | |||
269 | return null; | ||
270 | } | ||
271 | |||
272 | /// <summary> | ||
273 | /// Check watched threads. Fire alarm if appropriate. | ||
274 | /// </summary> | ||
275 | /// <param name="sender"></param> | ||
276 | /// <param name="e"></param> | ||
277 | private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e) | ||
278 | { | ||
279 | int now = Environment.TickCount & Int32.MaxValue; | ||
280 | int msElapsed = now - LastWatchdogThreadTick; | ||
281 | |||
282 | if (msElapsed > WATCHDOG_INTERVAL_MS * 2) | ||
283 | m_log.WarnFormat( | ||
284 | "[WATCHDOG]: {0} ms since Watchdog last ran. Interval should be approximately {1} ms", | ||
285 | msElapsed, WATCHDOG_INTERVAL_MS); | ||
286 | |||
287 | LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue; | ||
288 | |||
289 | Action<ThreadWatchdogInfo> callback = OnWatchdogTimeout; | ||
290 | |||
291 | if (callback != null) | ||
292 | { | ||
293 | List<ThreadWatchdogInfo> callbackInfos = null; | ||
294 | |||
295 | lock (m_threads) | ||
296 | { | ||
297 | foreach (ThreadWatchdogInfo threadInfo in m_threads.Values) | ||
298 | { | ||
299 | if (threadInfo.Thread.ThreadState == ThreadState.Stopped) | ||
300 | { | ||
301 | RemoveThread(threadInfo.Thread.ManagedThreadId); | ||
302 | |||
303 | if (callbackInfos == null) | ||
304 | callbackInfos = new List<ThreadWatchdogInfo>(); | ||
305 | |||
306 | callbackInfos.Add(threadInfo); | ||
307 | } | ||
308 | else if (!threadInfo.IsTimedOut && now - threadInfo.LastTick >= threadInfo.Timeout) | ||
309 | { | ||
310 | threadInfo.IsTimedOut = true; | ||
311 | |||
312 | if (threadInfo.AlarmIfTimeout) | ||
313 | { | ||
314 | if (callbackInfos == null) | ||
315 | callbackInfos = new List<ThreadWatchdogInfo>(); | ||
316 | |||
317 | callbackInfos.Add(threadInfo); | ||
318 | } | ||
319 | } | ||
320 | } | ||
321 | } | ||
322 | |||
323 | if (callbackInfos != null) | ||
324 | foreach (ThreadWatchdogInfo callbackInfo in callbackInfos) | ||
325 | callback(callbackInfo); | ||
326 | } | ||
327 | |||
328 | if (MemoryWatchdog.Enabled) | ||
329 | MemoryWatchdog.Update(); | ||
330 | |||
331 | m_watchdogTimer.Start(); | ||
332 | } | ||
333 | } | ||
334 | } | ||