aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJustin Clark-Casey (justincc)2012-06-07 02:44:13 +0100
committerJustin Clark-Casey (justincc)2012-06-07 02:44:13 +0100
commit98b46d48fefc7801b7c9e496000e9329f4266e5e (patch)
treee0236812dce3255874a4521993040c1753bd339c
parentminor: Change log messages on Warp3DImageModule to show they are from this mo... (diff)
downloadopensim-SC_OLD-98b46d48fefc7801b7c9e496000e9329f4266e5e.zip
opensim-SC_OLD-98b46d48fefc7801b7c9e496000e9329f4266e5e.tar.gz
opensim-SC_OLD-98b46d48fefc7801b7c9e496000e9329f4266e5e.tar.bz2
opensim-SC_OLD-98b46d48fefc7801b7c9e496000e9329f4266e5e.tar.xz
Allow the thread watchdog to accept an alarm method that is invoked if the timeout is breached.
This alarm can then invoke this to log extra information. This is used in LLUDPServer to show which client was being processed when incoming and outgoing udp watchdog alarms are triggered.
-rw-r--r--OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs2
-rw-r--r--OpenSim/Framework/Watchdog.cs40
-rw-r--r--OpenSim/Region/Application/OpenSim.cs10
-rw-r--r--OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs69
4 files changed, 96 insertions, 25 deletions
diff --git a/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs b/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs
index 0062d4e..f96fd1f 100644
--- a/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs
+++ b/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs
@@ -66,6 +66,7 @@ namespace OpenSim.Framework.Servers.HttpServer
66 ThreadPriority.Normal, 66 ThreadPriority.Normal,
67 false, 67 false,
68 true, 68 true,
69 null,
69 int.MaxValue); 70 int.MaxValue);
70 } 71 }
71 72
@@ -75,6 +76,7 @@ namespace OpenSim.Framework.Servers.HttpServer
75 ThreadPriority.Normal, 76 ThreadPriority.Normal,
76 false, 77 false,
77 true, 78 true,
79 null,
78 1000 * 60 * 10); 80 1000 * 60 * 10);
79 } 81 }
80 82
diff --git a/OpenSim/Framework/Watchdog.cs b/OpenSim/Framework/Watchdog.cs
index e93e50e..7552cd1 100644
--- a/OpenSim/Framework/Watchdog.cs
+++ b/OpenSim/Framework/Watchdog.cs
@@ -42,7 +42,7 @@ namespace OpenSim.Framework
42 const double WATCHDOG_INTERVAL_MS = 2500.0d; 42 const double WATCHDOG_INTERVAL_MS = 2500.0d;
43 43
44 /// <summary>Maximum timeout in milliseconds before a thread is considered dead</summary> 44 /// <summary>Maximum timeout in milliseconds before a thread is considered dead</summary>
45 const int WATCHDOG_TIMEOUT_MS = 5000; 45 public const int WATCHDOG_TIMEOUT_MS = 5000;
46 46
47 [System.Diagnostics.DebuggerDisplay("{Thread.Name}")] 47 [System.Diagnostics.DebuggerDisplay("{Thread.Name}")]
48 public class ThreadWatchdogInfo 48 public class ThreadWatchdogInfo
@@ -58,7 +58,7 @@ namespace OpenSim.Framework
58 public int FirstTick { get; private set; } 58 public int FirstTick { get; private set; }
59 59
60 /// <summary> 60 /// <summary>
61 /// First time this heartbeat update was invoked 61 /// Last time this heartbeat update was invoked
62 /// </summary> 62 /// </summary>
63 public int LastTick { get; set; } 63 public int LastTick { get; set; }
64 64
@@ -77,6 +77,11 @@ namespace OpenSim.Framework
77 /// </summary> 77 /// </summary>
78 public bool AlarmIfTimeout { get; set; } 78 public bool AlarmIfTimeout { get; set; }
79 79
80 /// <summary>
81 /// Method execute if alarm goes off. If null then no alarm method is fired.
82 /// </summary>
83 public Func<string> AlarmMethod { get; set; }
84
80 public ThreadWatchdogInfo(Thread thread, int timeout) 85 public ThreadWatchdogInfo(Thread thread, int timeout)
81 { 86 {
82 Thread = thread; 87 Thread = thread;
@@ -87,16 +92,10 @@ namespace OpenSim.Framework
87 } 92 }
88 93
89 /// <summary> 94 /// <summary>
90 /// This event is called whenever a tracked thread is stopped or 95 /// This event is called whenever a tracked thread is
91 /// has not called UpdateThread() in time 96 /// stopped or has not called UpdateThread() in time<
92 /// </summary> 97 /// /summary>
93 /// <param name="thread">The thread that has been identified as dead</param> 98 public static event Action<ThreadWatchdogInfo> OnWatchdogTimeout;
94 /// <param name="lastTick">The last time this thread called UpdateThread()</param>
95 public delegate void WatchdogTimeout(Thread thread, int lastTick);
96
97 /// <summary>This event is called whenever a tracked thread is
98 /// stopped or has not called UpdateThread() in time</summary>
99 public static event WatchdogTimeout OnWatchdogTimeout;
100 99
101 private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType); 100 private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
102 private static Dictionary<int, ThreadWatchdogInfo> m_threads; 101 private static Dictionary<int, ThreadWatchdogInfo> m_threads;
@@ -123,7 +122,7 @@ namespace OpenSim.Framework
123 public static Thread StartThread( 122 public static Thread StartThread(
124 ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout) 123 ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout)
125 { 124 {
126 return StartThread(start, name, priority, isBackground, alarmIfTimeout, WATCHDOG_TIMEOUT_MS); 125 return StartThread(start, name, priority, isBackground, alarmIfTimeout, null, WATCHDOG_TIMEOUT_MS);
127 } 126 }
128 127
129 /// <summary> 128 /// <summary>
@@ -135,17 +134,24 @@ namespace OpenSim.Framework
135 /// <param name="isBackground">True to run this thread as a background 134 /// <param name="isBackground">True to run this thread as a background
136 /// thread, otherwise false</param> 135 /// thread, otherwise false</param>
137 /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param> 136 /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param>
137 /// <param name="alarmMethod">
138 /// Alarm method to call if alarmIfTimeout is true and there is a timeout.
139 /// Normally, this will just return some useful debugging information.
140 /// </param>
138 /// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param> 141 /// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param>
139 /// <returns>The newly created Thread object</returns> 142 /// <returns>The newly created Thread object</returns>
140 public static Thread StartThread( 143 public static Thread StartThread(
141 ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout, int timeout) 144 ThreadStart start, string name, ThreadPriority priority, bool isBackground,
145 bool alarmIfTimeout, Func<string> alarmMethod, int timeout)
142 { 146 {
143 Thread thread = new Thread(start); 147 Thread thread = new Thread(start);
144 thread.Name = name; 148 thread.Name = name;
145 thread.Priority = priority; 149 thread.Priority = priority;
146 thread.IsBackground = isBackground; 150 thread.IsBackground = isBackground;
147 151
148 ThreadWatchdogInfo twi = new ThreadWatchdogInfo(thread, timeout) { AlarmIfTimeout = alarmIfTimeout }; 152 ThreadWatchdogInfo twi
153 = new ThreadWatchdogInfo(thread, timeout)
154 { AlarmIfTimeout = alarmIfTimeout, AlarmMethod = alarmMethod };
149 155
150 m_log.DebugFormat( 156 m_log.DebugFormat(
151 "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId); 157 "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId);
@@ -258,7 +264,7 @@ namespace OpenSim.Framework
258 /// <param name="e"></param> 264 /// <param name="e"></param>
259 private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e) 265 private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e)
260 { 266 {
261 WatchdogTimeout callback = OnWatchdogTimeout; 267 Action<ThreadWatchdogInfo> callback = OnWatchdogTimeout;
262 268
263 if (callback != null) 269 if (callback != null)
264 { 270 {
@@ -296,7 +302,7 @@ namespace OpenSim.Framework
296 302
297 if (callbackInfos != null) 303 if (callbackInfos != null)
298 foreach (ThreadWatchdogInfo callbackInfo in callbackInfos) 304 foreach (ThreadWatchdogInfo callbackInfo in callbackInfos)
299 callback(callbackInfo.Thread, callbackInfo.LastTick); 305 callback(callbackInfo);
300 } 306 }
301 307
302 m_watchdogTimer.Start(); 308 m_watchdogTimer.Start();
diff --git a/OpenSim/Region/Application/OpenSim.cs b/OpenSim/Region/Application/OpenSim.cs
index 76ac827..caba236 100644
--- a/OpenSim/Region/Application/OpenSim.cs
+++ b/OpenSim/Region/Application/OpenSim.cs
@@ -438,12 +438,16 @@ namespace OpenSim
438 } 438 }
439 } 439 }
440 440
441 private void WatchdogTimeoutHandler(System.Threading.Thread thread, int lastTick) 441 private void WatchdogTimeoutHandler(Watchdog.ThreadWatchdogInfo twi)
442 { 442 {
443 int now = Environment.TickCount & Int32.MaxValue; 443 int now = Environment.TickCount & Int32.MaxValue;
444 444
445 m_log.ErrorFormat("[WATCHDOG]: Timeout detected for thread \"{0}\". ThreadState={1}. Last tick was {2}ms ago", 445 m_log.ErrorFormat(
446 thread.Name, thread.ThreadState, now - lastTick); 446 "[WATCHDOG]: Timeout detected for thread \"{0}\". ThreadState={1}. Last tick was {2}ms ago. {3}",
447 twi.Thread.Name,
448 twi.Thread.ThreadState,
449 now - twi.LastTick,
450 twi.AlarmMethod != null ? string.Format("Data: {0}", twi.AlarmMethod()) : "");
447 } 451 }
448 452
449 #region Console Commands 453 #region Console Commands
diff --git a/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs b/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs
index 32ba590..e37adb8 100644
--- a/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs
+++ b/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs
@@ -163,6 +163,16 @@ namespace OpenSim.Region.ClientStack.LindenUDP
163 163
164 private int m_malformedCount = 0; // Guard against a spamming attack 164 private int m_malformedCount = 0; // Guard against a spamming attack
165 165
166 /// <summary>
167 /// Record current outgoing client for monitoring purposes.
168 /// </summary>
169 private IClientAPI m_currentOutgoingClient;
170
171 /// <summary>
172 /// Recording current incoming client for monitoring purposes.
173 /// </summary>
174 private IClientAPI m_currentIncomingClient;
175
166 public LLUDPServer(IPAddress listenIP, ref uint port, int proxyPortOffsetParm, bool allow_alternate_port, IConfigSource configSource, AgentCircuitManager circuitManager) 176 public LLUDPServer(IPAddress listenIP, ref uint port, int proxyPortOffsetParm, bool allow_alternate_port, IConfigSource configSource, AgentCircuitManager circuitManager)
167 : base(listenIP, (int)port) 177 : base(listenIP, (int)port)
168 { 178 {
@@ -244,19 +254,56 @@ namespace OpenSim.Region.ClientStack.LindenUDP
244 if (m_scene == null) 254 if (m_scene == null)
245 throw new InvalidOperationException("[LLUDPSERVER]: Cannot LLUDPServer.Start() without an IScene reference"); 255 throw new InvalidOperationException("[LLUDPSERVER]: Cannot LLUDPServer.Start() without an IScene reference");
246 256
247 m_log.Info("[LLUDPSERVER]: Starting the LLUDP server in " + (m_asyncPacketHandling ? "asynchronous" : "synchronous") + " mode"); 257 m_log.InfoFormat(
258 "[LLUDPSERVER]: Starting the LLUDP server in {0} mode",
259 m_asyncPacketHandling ? "asynchronous" : "synchronous");
248 260
249 base.Start(m_recvBufferSize, m_asyncPacketHandling); 261 base.Start(m_recvBufferSize, m_asyncPacketHandling);
250 262
251 // Start the packet processing threads 263 // Start the packet processing threads
252 Watchdog.StartThread( 264 Watchdog.StartThread(
253 IncomingPacketHandler, "Incoming Packets (" + m_scene.RegionInfo.RegionName + ")", ThreadPriority.Normal, false, true); 265 IncomingPacketHandler,
266 string.Format("Incoming Packets ({0})", m_scene.RegionInfo.RegionName),
267 ThreadPriority.Normal,
268 false,
269 true,
270 GetWatchdogIncomingAlarmData,
271 Watchdog.WATCHDOG_TIMEOUT_MS);
272
254 Watchdog.StartThread( 273 Watchdog.StartThread(
255 OutgoingPacketHandler, "Outgoing Packets (" + m_scene.RegionInfo.RegionName + ")", ThreadPriority.Normal, false, true); 274 OutgoingPacketHandler,
275 string.Format("Outgoing Packets ({0})", m_scene.RegionInfo.RegionName),
276 ThreadPriority.Normal,
277 false,
278 true,
279 GetWatchdogOutgoingAlarmData,
280 Watchdog.WATCHDOG_TIMEOUT_MS);
256 281
257 m_elapsedMSSinceLastStatReport = Environment.TickCount; 282 m_elapsedMSSinceLastStatReport = Environment.TickCount;
258 } 283 }
259 284
285 /// <summary>
286 /// If the outgoing UDP thread times out, then return client that was being processed to help with debugging.
287 /// </summary>
288 /// <returns></returns>
289 private string GetWatchdogIncomingAlarmData()
290 {
291 return string.Format(
292 "Client is {0}",
293 m_currentIncomingClient != null ? m_currentIncomingClient.Name : "none");
294 }
295
296 /// <summary>
297 /// If the outgoing UDP thread times out, then return client that was being processed to help with debugging.
298 /// </summary>
299 /// <returns></returns>
300 private string GetWatchdogOutgoingAlarmData()
301 {
302 return string.Format(
303 "Client is {0}",
304 m_currentOutgoingClient != null ? m_currentOutgoingClient.Name : "none");
305 }
306
260 public new void Stop() 307 public new void Stop()
261 { 308 {
262 m_log.Info("[LLUDPSERVER]: Shutting down the LLUDP server for " + m_scene.RegionInfo.RegionName); 309 m_log.Info("[LLUDPSERVER]: Shutting down the LLUDP server for " + m_scene.RegionInfo.RegionName);
@@ -1173,6 +1220,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
1173 // client. m_packetSent will be set to true if a packet is sent 1220 // client. m_packetSent will be set to true if a packet is sent
1174 m_scene.ForEachClient(clientPacketHandler); 1221 m_scene.ForEachClient(clientPacketHandler);
1175 1222
1223 m_currentOutgoingClient = null;
1224
1176 // If nothing was sent, sleep for the minimum amount of time before a 1225 // If nothing was sent, sleep for the minimum amount of time before a
1177 // token bucket could get more tokens 1226 // token bucket could get more tokens
1178 if (!m_packetSent) 1227 if (!m_packetSent)
@@ -1191,6 +1240,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
1191 1240
1192 private void ClientOutgoingPacketHandler(IClientAPI client) 1241 private void ClientOutgoingPacketHandler(IClientAPI client)
1193 { 1242 {
1243 m_currentOutgoingClient = client;
1244
1194 try 1245 try
1195 { 1246 {
1196 if (client is LLClientView) 1247 if (client is LLClientView)
@@ -1216,8 +1267,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
1216 } 1267 }
1217 catch (Exception ex) 1268 catch (Exception ex)
1218 { 1269 {
1219 m_log.Error("[LLUDPSERVER]: OutgoingPacketHandler iteration for " + client.Name + 1270 m_log.Error(
1220 " threw an exception: " + ex.Message, ex); 1271 string.Format("[LLUDPSERVER]: OutgoingPacketHandler iteration for {0} threw ", client.Name), ex);
1221 } 1272 }
1222 } 1273 }
1223 1274
@@ -1243,6 +1294,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
1243 { 1294 {
1244 nticks++; 1295 nticks++;
1245 watch1.Start(); 1296 watch1.Start();
1297 m_currentOutgoingClient = client;
1298
1246 try 1299 try
1247 { 1300 {
1248 if (client is LLClientView) 1301 if (client is LLClientView)
@@ -1344,6 +1397,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
1344 // Make sure this client is still alive 1397 // Make sure this client is still alive
1345 if (m_scene.TryGetClient(udpClient.AgentID, out client)) 1398 if (m_scene.TryGetClient(udpClient.AgentID, out client))
1346 { 1399 {
1400 m_currentIncomingClient = client;
1401
1347 try 1402 try
1348 { 1403 {
1349 // Process this packet 1404 // Process this packet
@@ -1361,6 +1416,10 @@ namespace OpenSim.Region.ClientStack.LindenUDP
1361 m_log.ErrorFormat("[LLUDPSERVER]: Client packet handler for {0} for packet {1} threw an exception", udpClient.AgentID, packet.Type); 1416 m_log.ErrorFormat("[LLUDPSERVER]: Client packet handler for {0} for packet {1} threw an exception", udpClient.AgentID, packet.Type);
1362 m_log.Error(e.Message, e); 1417 m_log.Error(e.Message, e);
1363 } 1418 }
1419 finally
1420 {
1421 m_currentIncomingClient = null;
1422 }
1364 } 1423 }
1365 else 1424 else
1366 { 1425 {