diff options
author | Justin Clark-Casey (justincc) | 2012-06-07 02:44:13 +0100 |
---|---|---|
committer | Justin Clark-Casey (justincc) | 2012-06-07 02:44:13 +0100 |
commit | 98b46d48fefc7801b7c9e496000e9329f4266e5e (patch) | |
tree | e0236812dce3255874a4521993040c1753bd339c | |
parent | minor: Change log messages on Warp3DImageModule to show they are from this mo... (diff) | |
download | opensim-SC-98b46d48fefc7801b7c9e496000e9329f4266e5e.zip opensim-SC-98b46d48fefc7801b7c9e496000e9329f4266e5e.tar.gz opensim-SC-98b46d48fefc7801b7c9e496000e9329f4266e5e.tar.bz2 opensim-SC-98b46d48fefc7801b7c9e496000e9329f4266e5e.tar.xz |
Allow the thread watchdog to accept an alarm method that is invoked if the timeout is breached.
This alarm can then invoke this to log extra information.
This is used in LLUDPServer to show which client was being processed when incoming and outgoing udp watchdog alarms are triggered.
-rw-r--r-- | OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs | 2 | ||||
-rw-r--r-- | OpenSim/Framework/Watchdog.cs | 40 | ||||
-rw-r--r-- | OpenSim/Region/Application/OpenSim.cs | 10 | ||||
-rw-r--r-- | OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs | 69 |
4 files changed, 96 insertions, 25 deletions
diff --git a/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs b/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs index 0062d4e..f96fd1f 100644 --- a/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs +++ b/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs | |||
@@ -66,6 +66,7 @@ namespace OpenSim.Framework.Servers.HttpServer | |||
66 | ThreadPriority.Normal, | 66 | ThreadPriority.Normal, |
67 | false, | 67 | false, |
68 | true, | 68 | true, |
69 | null, | ||
69 | int.MaxValue); | 70 | int.MaxValue); |
70 | } | 71 | } |
71 | 72 | ||
@@ -75,6 +76,7 @@ namespace OpenSim.Framework.Servers.HttpServer | |||
75 | ThreadPriority.Normal, | 76 | ThreadPriority.Normal, |
76 | false, | 77 | false, |
77 | true, | 78 | true, |
79 | null, | ||
78 | 1000 * 60 * 10); | 80 | 1000 * 60 * 10); |
79 | } | 81 | } |
80 | 82 | ||
diff --git a/OpenSim/Framework/Watchdog.cs b/OpenSim/Framework/Watchdog.cs index e93e50e..7552cd1 100644 --- a/OpenSim/Framework/Watchdog.cs +++ b/OpenSim/Framework/Watchdog.cs | |||
@@ -42,7 +42,7 @@ namespace OpenSim.Framework | |||
42 | const double WATCHDOG_INTERVAL_MS = 2500.0d; | 42 | const double WATCHDOG_INTERVAL_MS = 2500.0d; |
43 | 43 | ||
44 | /// <summary>Maximum timeout in milliseconds before a thread is considered dead</summary> | 44 | /// <summary>Maximum timeout in milliseconds before a thread is considered dead</summary> |
45 | const int WATCHDOG_TIMEOUT_MS = 5000; | 45 | public const int WATCHDOG_TIMEOUT_MS = 5000; |
46 | 46 | ||
47 | [System.Diagnostics.DebuggerDisplay("{Thread.Name}")] | 47 | [System.Diagnostics.DebuggerDisplay("{Thread.Name}")] |
48 | public class ThreadWatchdogInfo | 48 | public class ThreadWatchdogInfo |
@@ -58,7 +58,7 @@ namespace OpenSim.Framework | |||
58 | public int FirstTick { get; private set; } | 58 | public int FirstTick { get; private set; } |
59 | 59 | ||
60 | /// <summary> | 60 | /// <summary> |
61 | /// First time this heartbeat update was invoked | 61 | /// Last time this heartbeat update was invoked |
62 | /// </summary> | 62 | /// </summary> |
63 | public int LastTick { get; set; } | 63 | public int LastTick { get; set; } |
64 | 64 | ||
@@ -77,6 +77,11 @@ namespace OpenSim.Framework | |||
77 | /// </summary> | 77 | /// </summary> |
78 | public bool AlarmIfTimeout { get; set; } | 78 | public bool AlarmIfTimeout { get; set; } |
79 | 79 | ||
80 | /// <summary> | ||
81 | /// Method execute if alarm goes off. If null then no alarm method is fired. | ||
82 | /// </summary> | ||
83 | public Func<string> AlarmMethod { get; set; } | ||
84 | |||
80 | public ThreadWatchdogInfo(Thread thread, int timeout) | 85 | public ThreadWatchdogInfo(Thread thread, int timeout) |
81 | { | 86 | { |
82 | Thread = thread; | 87 | Thread = thread; |
@@ -87,16 +92,10 @@ namespace OpenSim.Framework | |||
87 | } | 92 | } |
88 | 93 | ||
89 | /// <summary> | 94 | /// <summary> |
90 | /// This event is called whenever a tracked thread is stopped or | 95 | /// This event is called whenever a tracked thread is |
91 | /// has not called UpdateThread() in time | 96 | /// stopped or has not called UpdateThread() in time< |
92 | /// </summary> | 97 | /// /summary> |
93 | /// <param name="thread">The thread that has been identified as dead</param> | 98 | public static event Action<ThreadWatchdogInfo> OnWatchdogTimeout; |
94 | /// <param name="lastTick">The last time this thread called UpdateThread()</param> | ||
95 | public delegate void WatchdogTimeout(Thread thread, int lastTick); | ||
96 | |||
97 | /// <summary>This event is called whenever a tracked thread is | ||
98 | /// stopped or has not called UpdateThread() in time</summary> | ||
99 | public static event WatchdogTimeout OnWatchdogTimeout; | ||
100 | 99 | ||
101 | private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType); | 100 | private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType); |
102 | private static Dictionary<int, ThreadWatchdogInfo> m_threads; | 101 | private static Dictionary<int, ThreadWatchdogInfo> m_threads; |
@@ -123,7 +122,7 @@ namespace OpenSim.Framework | |||
123 | public static Thread StartThread( | 122 | public static Thread StartThread( |
124 | ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout) | 123 | ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout) |
125 | { | 124 | { |
126 | return StartThread(start, name, priority, isBackground, alarmIfTimeout, WATCHDOG_TIMEOUT_MS); | 125 | return StartThread(start, name, priority, isBackground, alarmIfTimeout, null, WATCHDOG_TIMEOUT_MS); |
127 | } | 126 | } |
128 | 127 | ||
129 | /// <summary> | 128 | /// <summary> |
@@ -135,17 +134,24 @@ namespace OpenSim.Framework | |||
135 | /// <param name="isBackground">True to run this thread as a background | 134 | /// <param name="isBackground">True to run this thread as a background |
136 | /// thread, otherwise false</param> | 135 | /// thread, otherwise false</param> |
137 | /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param> | 136 | /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param> |
137 | /// <param name="alarmMethod"> | ||
138 | /// Alarm method to call if alarmIfTimeout is true and there is a timeout. | ||
139 | /// Normally, this will just return some useful debugging information. | ||
140 | /// </param> | ||
138 | /// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param> | 141 | /// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param> |
139 | /// <returns>The newly created Thread object</returns> | 142 | /// <returns>The newly created Thread object</returns> |
140 | public static Thread StartThread( | 143 | public static Thread StartThread( |
141 | ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout, int timeout) | 144 | ThreadStart start, string name, ThreadPriority priority, bool isBackground, |
145 | bool alarmIfTimeout, Func<string> alarmMethod, int timeout) | ||
142 | { | 146 | { |
143 | Thread thread = new Thread(start); | 147 | Thread thread = new Thread(start); |
144 | thread.Name = name; | 148 | thread.Name = name; |
145 | thread.Priority = priority; | 149 | thread.Priority = priority; |
146 | thread.IsBackground = isBackground; | 150 | thread.IsBackground = isBackground; |
147 | 151 | ||
148 | ThreadWatchdogInfo twi = new ThreadWatchdogInfo(thread, timeout) { AlarmIfTimeout = alarmIfTimeout }; | 152 | ThreadWatchdogInfo twi |
153 | = new ThreadWatchdogInfo(thread, timeout) | ||
154 | { AlarmIfTimeout = alarmIfTimeout, AlarmMethod = alarmMethod }; | ||
149 | 155 | ||
150 | m_log.DebugFormat( | 156 | m_log.DebugFormat( |
151 | "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId); | 157 | "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId); |
@@ -258,7 +264,7 @@ namespace OpenSim.Framework | |||
258 | /// <param name="e"></param> | 264 | /// <param name="e"></param> |
259 | private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e) | 265 | private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e) |
260 | { | 266 | { |
261 | WatchdogTimeout callback = OnWatchdogTimeout; | 267 | Action<ThreadWatchdogInfo> callback = OnWatchdogTimeout; |
262 | 268 | ||
263 | if (callback != null) | 269 | if (callback != null) |
264 | { | 270 | { |
@@ -296,7 +302,7 @@ namespace OpenSim.Framework | |||
296 | 302 | ||
297 | if (callbackInfos != null) | 303 | if (callbackInfos != null) |
298 | foreach (ThreadWatchdogInfo callbackInfo in callbackInfos) | 304 | foreach (ThreadWatchdogInfo callbackInfo in callbackInfos) |
299 | callback(callbackInfo.Thread, callbackInfo.LastTick); | 305 | callback(callbackInfo); |
300 | } | 306 | } |
301 | 307 | ||
302 | m_watchdogTimer.Start(); | 308 | m_watchdogTimer.Start(); |
diff --git a/OpenSim/Region/Application/OpenSim.cs b/OpenSim/Region/Application/OpenSim.cs index 76ac827..caba236 100644 --- a/OpenSim/Region/Application/OpenSim.cs +++ b/OpenSim/Region/Application/OpenSim.cs | |||
@@ -438,12 +438,16 @@ namespace OpenSim | |||
438 | } | 438 | } |
439 | } | 439 | } |
440 | 440 | ||
441 | private void WatchdogTimeoutHandler(System.Threading.Thread thread, int lastTick) | 441 | private void WatchdogTimeoutHandler(Watchdog.ThreadWatchdogInfo twi) |
442 | { | 442 | { |
443 | int now = Environment.TickCount & Int32.MaxValue; | 443 | int now = Environment.TickCount & Int32.MaxValue; |
444 | 444 | ||
445 | m_log.ErrorFormat("[WATCHDOG]: Timeout detected for thread \"{0}\". ThreadState={1}. Last tick was {2}ms ago", | 445 | m_log.ErrorFormat( |
446 | thread.Name, thread.ThreadState, now - lastTick); | 446 | "[WATCHDOG]: Timeout detected for thread \"{0}\". ThreadState={1}. Last tick was {2}ms ago. {3}", |
447 | twi.Thread.Name, | ||
448 | twi.Thread.ThreadState, | ||
449 | now - twi.LastTick, | ||
450 | twi.AlarmMethod != null ? string.Format("Data: {0}", twi.AlarmMethod()) : ""); | ||
447 | } | 451 | } |
448 | 452 | ||
449 | #region Console Commands | 453 | #region Console Commands |
diff --git a/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs b/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs index 32ba590..e37adb8 100644 --- a/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs +++ b/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs | |||
@@ -163,6 +163,16 @@ namespace OpenSim.Region.ClientStack.LindenUDP | |||
163 | 163 | ||
164 | private int m_malformedCount = 0; // Guard against a spamming attack | 164 | private int m_malformedCount = 0; // Guard against a spamming attack |
165 | 165 | ||
166 | /// <summary> | ||
167 | /// Record current outgoing client for monitoring purposes. | ||
168 | /// </summary> | ||
169 | private IClientAPI m_currentOutgoingClient; | ||
170 | |||
171 | /// <summary> | ||
172 | /// Recording current incoming client for monitoring purposes. | ||
173 | /// </summary> | ||
174 | private IClientAPI m_currentIncomingClient; | ||
175 | |||
166 | public LLUDPServer(IPAddress listenIP, ref uint port, int proxyPortOffsetParm, bool allow_alternate_port, IConfigSource configSource, AgentCircuitManager circuitManager) | 176 | public LLUDPServer(IPAddress listenIP, ref uint port, int proxyPortOffsetParm, bool allow_alternate_port, IConfigSource configSource, AgentCircuitManager circuitManager) |
167 | : base(listenIP, (int)port) | 177 | : base(listenIP, (int)port) |
168 | { | 178 | { |
@@ -244,19 +254,56 @@ namespace OpenSim.Region.ClientStack.LindenUDP | |||
244 | if (m_scene == null) | 254 | if (m_scene == null) |
245 | throw new InvalidOperationException("[LLUDPSERVER]: Cannot LLUDPServer.Start() without an IScene reference"); | 255 | throw new InvalidOperationException("[LLUDPSERVER]: Cannot LLUDPServer.Start() without an IScene reference"); |
246 | 256 | ||
247 | m_log.Info("[LLUDPSERVER]: Starting the LLUDP server in " + (m_asyncPacketHandling ? "asynchronous" : "synchronous") + " mode"); | 257 | m_log.InfoFormat( |
258 | "[LLUDPSERVER]: Starting the LLUDP server in {0} mode", | ||
259 | m_asyncPacketHandling ? "asynchronous" : "synchronous"); | ||
248 | 260 | ||
249 | base.Start(m_recvBufferSize, m_asyncPacketHandling); | 261 | base.Start(m_recvBufferSize, m_asyncPacketHandling); |
250 | 262 | ||
251 | // Start the packet processing threads | 263 | // Start the packet processing threads |
252 | Watchdog.StartThread( | 264 | Watchdog.StartThread( |
253 | IncomingPacketHandler, "Incoming Packets (" + m_scene.RegionInfo.RegionName + ")", ThreadPriority.Normal, false, true); | 265 | IncomingPacketHandler, |
266 | string.Format("Incoming Packets ({0})", m_scene.RegionInfo.RegionName), | ||
267 | ThreadPriority.Normal, | ||
268 | false, | ||
269 | true, | ||
270 | GetWatchdogIncomingAlarmData, | ||
271 | Watchdog.WATCHDOG_TIMEOUT_MS); | ||
272 | |||
254 | Watchdog.StartThread( | 273 | Watchdog.StartThread( |
255 | OutgoingPacketHandler, "Outgoing Packets (" + m_scene.RegionInfo.RegionName + ")", ThreadPriority.Normal, false, true); | 274 | OutgoingPacketHandler, |
275 | string.Format("Outgoing Packets ({0})", m_scene.RegionInfo.RegionName), | ||
276 | ThreadPriority.Normal, | ||
277 | false, | ||
278 | true, | ||
279 | GetWatchdogOutgoingAlarmData, | ||
280 | Watchdog.WATCHDOG_TIMEOUT_MS); | ||
256 | 281 | ||
257 | m_elapsedMSSinceLastStatReport = Environment.TickCount; | 282 | m_elapsedMSSinceLastStatReport = Environment.TickCount; |
258 | } | 283 | } |
259 | 284 | ||
285 | /// <summary> | ||
286 | /// If the outgoing UDP thread times out, then return client that was being processed to help with debugging. | ||
287 | /// </summary> | ||
288 | /// <returns></returns> | ||
289 | private string GetWatchdogIncomingAlarmData() | ||
290 | { | ||
291 | return string.Format( | ||
292 | "Client is {0}", | ||
293 | m_currentIncomingClient != null ? m_currentIncomingClient.Name : "none"); | ||
294 | } | ||
295 | |||
296 | /// <summary> | ||
297 | /// If the outgoing UDP thread times out, then return client that was being processed to help with debugging. | ||
298 | /// </summary> | ||
299 | /// <returns></returns> | ||
300 | private string GetWatchdogOutgoingAlarmData() | ||
301 | { | ||
302 | return string.Format( | ||
303 | "Client is {0}", | ||
304 | m_currentOutgoingClient != null ? m_currentOutgoingClient.Name : "none"); | ||
305 | } | ||
306 | |||
260 | public new void Stop() | 307 | public new void Stop() |
261 | { | 308 | { |
262 | m_log.Info("[LLUDPSERVER]: Shutting down the LLUDP server for " + m_scene.RegionInfo.RegionName); | 309 | m_log.Info("[LLUDPSERVER]: Shutting down the LLUDP server for " + m_scene.RegionInfo.RegionName); |
@@ -1173,6 +1220,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP | |||
1173 | // client. m_packetSent will be set to true if a packet is sent | 1220 | // client. m_packetSent will be set to true if a packet is sent |
1174 | m_scene.ForEachClient(clientPacketHandler); | 1221 | m_scene.ForEachClient(clientPacketHandler); |
1175 | 1222 | ||
1223 | m_currentOutgoingClient = null; | ||
1224 | |||
1176 | // If nothing was sent, sleep for the minimum amount of time before a | 1225 | // If nothing was sent, sleep for the minimum amount of time before a |
1177 | // token bucket could get more tokens | 1226 | // token bucket could get more tokens |
1178 | if (!m_packetSent) | 1227 | if (!m_packetSent) |
@@ -1191,6 +1240,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP | |||
1191 | 1240 | ||
1192 | private void ClientOutgoingPacketHandler(IClientAPI client) | 1241 | private void ClientOutgoingPacketHandler(IClientAPI client) |
1193 | { | 1242 | { |
1243 | m_currentOutgoingClient = client; | ||
1244 | |||
1194 | try | 1245 | try |
1195 | { | 1246 | { |
1196 | if (client is LLClientView) | 1247 | if (client is LLClientView) |
@@ -1216,8 +1267,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP | |||
1216 | } | 1267 | } |
1217 | catch (Exception ex) | 1268 | catch (Exception ex) |
1218 | { | 1269 | { |
1219 | m_log.Error("[LLUDPSERVER]: OutgoingPacketHandler iteration for " + client.Name + | 1270 | m_log.Error( |
1220 | " threw an exception: " + ex.Message, ex); | 1271 | string.Format("[LLUDPSERVER]: OutgoingPacketHandler iteration for {0} threw ", client.Name), ex); |
1221 | } | 1272 | } |
1222 | } | 1273 | } |
1223 | 1274 | ||
@@ -1243,6 +1294,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP | |||
1243 | { | 1294 | { |
1244 | nticks++; | 1295 | nticks++; |
1245 | watch1.Start(); | 1296 | watch1.Start(); |
1297 | m_currentOutgoingClient = client; | ||
1298 | |||
1246 | try | 1299 | try |
1247 | { | 1300 | { |
1248 | if (client is LLClientView) | 1301 | if (client is LLClientView) |
@@ -1344,6 +1397,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP | |||
1344 | // Make sure this client is still alive | 1397 | // Make sure this client is still alive |
1345 | if (m_scene.TryGetClient(udpClient.AgentID, out client)) | 1398 | if (m_scene.TryGetClient(udpClient.AgentID, out client)) |
1346 | { | 1399 | { |
1400 | m_currentIncomingClient = client; | ||
1401 | |||
1347 | try | 1402 | try |
1348 | { | 1403 | { |
1349 | // Process this packet | 1404 | // Process this packet |
@@ -1361,6 +1416,10 @@ namespace OpenSim.Region.ClientStack.LindenUDP | |||
1361 | m_log.ErrorFormat("[LLUDPSERVER]: Client packet handler for {0} for packet {1} threw an exception", udpClient.AgentID, packet.Type); | 1416 | m_log.ErrorFormat("[LLUDPSERVER]: Client packet handler for {0} for packet {1} threw an exception", udpClient.AgentID, packet.Type); |
1362 | m_log.Error(e.Message, e); | 1417 | m_log.Error(e.Message, e); |
1363 | } | 1418 | } |
1419 | finally | ||
1420 | { | ||
1421 | m_currentIncomingClient = null; | ||
1422 | } | ||
1364 | } | 1423 | } |
1365 | else | 1424 | else |
1366 | { | 1425 | { |