From fab1f109d0564b44c950055d3233d6e7a1f3be81 Mon Sep 17 00:00:00 2001
From: onefang
Date: Thu, 23 Feb 2023 02:26:36 +1000
Subject: Add yet another timeout watchdog.
---
README.md | 1 +
apt-panopticommon.lua | 35 ++++++++++++++++++++++++++++-------
apt-panopticon.lua | 14 +++++++-------
laggers | 3 +++
update_apt-panopticon | 12 ++++++++++--
5 files changed, 49 insertions(+), 16 deletions(-)
create mode 100755 laggers
diff --git a/README.md b/README.md
index 2edc3ca..7e5d6d2 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,7 @@ installed -
* lua-rrd
* LuaSocket, on Debian based systems it'll be in the lua-socket package.
* md5sum and sha256, on Debian based systems they'll be in the coreutils package.
+* timeout, on Debian based systems it'll be in the coreutils package.
* rrdtool
* xz, on Debian based systems it'll be in the xz-utils package.
diff --git a/apt-panopticommon.lua b/apt-panopticommon.lua
index da1fc3d..a2def5a 100644
--- a/apt-panopticommon.lua
+++ b/apt-panopticommon.lua
@@ -134,7 +134,7 @@ APT.parseArgs = function(args)
local arg = {}
local sendArgs = ""
-- A special test to disable IPv6 tests if IPv6 isn't available.
- if 1 == APT.exe('ip -6 addr | grep inet6 | grep " global"'):Do().status then
+ if 1 == APT.exe('ip -6 addr | grep inet6 | grep " global"'):timeout():Do().status then
table.insert(args, '--tests=-IPv6')
end
if 0 ~= #(args) then
@@ -529,7 +529,7 @@ APT.tested = function(prot, test, host)
end
APT.exe = function(c)
- local exe = {status = 0, result = '', log = true, cmd = c .. ' '}
+ local exe = {status = 0, result = '', log = true, cmd = c .. ' ', command = c}
function exe:log()
self.log = true
@@ -543,6 +543,16 @@ APT.exe = function(c)
end
return self
end
+ function exe:timeout(c)
+ -- timeout returns a status of - command status if --preserve-status; "128+9" (actually 137) if --kill-after ends up being done; 124 if it had to TERM; command status if all went well.
+ -- --kill-after means "send KILL after TERM fails.
+ if nil == c then
+ self.cmd = 'timeout --kill-after=10.0 --foreground -v 42.0s ' .. self.cmd
+ else
+ self.cmd = 'timeout --kill-after=10.0 --foreground -v ' .. c .. ' ' .. self.cmd
+ end
+ return self
+ end
function exe:also(c)
if nil == c then c = '' else c = ' ' .. c end
self.cmd = self.cmd .. ';' .. c .. ' '
@@ -579,17 +589,28 @@ APT.exe = function(c)
I'm getting 7168 or 0. No idea what the fuck that is.
local ok, rslt, status = os.execute(s)
]]
- local f = APT.readCmd(self.cmd .. ' ; echo "$?"', 'r')
+ local f = APT.readCmd(self.cmd, 'r')
-- The last line will be the command's returned status, collect everything else in result.
self.status = '' -- Otherwise the result starts with 0.
+ self.result = '\n'
+ for i,l in ipairs(f) do
+ self.result = self.result .. l .. "\n"
+ end
+ f = APT.readCmd('echo "$?"', 'r')
for i,l in ipairs(f) do
- self.result = self.result .. self.status .. "\n"
- self.status = l
+ self.status = tonumber(l)
+ if (137 == self.status) or (124 == self.status) then
+ print("timeout killed " .. self.status .. ' ' .. self.command)
+ E("timeout killed " .. self.status .. ' ' .. self.command)
+ elseif (0 ~= self.status) then
+ print("status |" .. self.status .. '| ' .. self.command)
+ E("status |" .. self.status .. '| ' .. self.command)
+ end
end
- self.status = tonumber(self.status)
return self
end
- function exe:fork()
+ function exe:fork(host)
+ if nil ~= host then self.cmd = self.cmd .. '; r=$?; if [ $r -ge 124 ]; then echo "$r ' .. host .. ' failed forked command ' .. string.gsub(self.cmd, '"', "'") .. '"; fi' end
self.cmd = '{ ' .. self.cmd .. '; } &'
if true == self.log then D(" forking - " .. self.cmd .. "
") end
os.execute(self.cmd)
diff --git a/apt-panopticon.lua b/apt-panopticon.lua
index 8fd39e5..93243a4 100755
--- a/apt-panopticon.lua
+++ b/apt-panopticon.lua
@@ -330,7 +330,7 @@ checkHEAD = function (host, URL, r, retry, sanity)
'curl -I --retry 0 -s --path-as-is --connect-timeout ' .. APT.options.timeout.value .. ' --max-redirs 0 ' .. APT.IPv46 .. ' ' ..
IP .. ' ' .. '-o /dev/null -D results/"HEADERS_' .. fname .. '" ' ..
hdr .. ' -w "#%{http_code} %{ssl_verify_result} %{url_effective}\\n" ' .. PU.scheme .. '://' .. host .. PU.path .. ' >>results/"STATUS_' .. fname .. '"'
- ):Nice():log():Do().status
+ ):timeout(APT.options.maxtime.value * 2.0):Nice():log():Do().status
if 0 < r then
APT.tested(PU.scheme, 'Redirects', host)
else
@@ -356,7 +356,7 @@ checkHEAD = function (host, URL, r, retry, sanity)
if 0 ~= status then
local msg = curlStatus[status]
if nil == msg then msg = "UNKNOWN CURL STATUS CODE!" end
- if (28 == status) or (7 == status) then
+ if (128+9 == status) or (124 == status) or (28 == status) or (7 == status) then
T(spcd .. spcd .. "TIMEOUT " .. timeouts + 1 .. ", retry " .. retry + 1 .. ' ' .. APT.lnk(URL), PU.scheme, sanity, host)
timeouts = timeouts + 1
else
@@ -447,7 +447,7 @@ checkHEAD = function (host, URL, r, retry, sanity)
local pth = path:match('^(.*/pool/).*$')
if nil ~= pth then table.insert(APT.results[PU.scheme].redirects, pu.host .. "/" .. pth) else E(spcd .. spcd .. 'Odd redirect path ' .. path) end
I(spcd .. spcd .. "Now checking redirected host " .. u .. ' for ' .. APT.lnk(URL) .. arw .. APT.lnk(location), host)
- APT.exe(downloadLock .. "REDIR-" .. check .. ".log.txt" .. " ./apt-panopticon.lua " .. extraArgs .. ' ' .. pu.host .. "/" .. path .. " " .. file):Nice():log():fork()
+ APT.exe(downloadLock .. "REDIR-" .. check .. ".log.txt" .. " ./apt-panopticon.lua " .. extraArgs .. ' ' .. pu.host .. "/" .. path .. " " .. file):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(pu.host)
D(spcd .. 'logging to ' .. APT.logName(pu.host, nil, file)[2])
APT.tested(PU.scheme, 'Redirects', host)
end
@@ -534,7 +534,7 @@ checkHost = function (orig, host, path, ip, file)
else
if orig == host then
I("Testing mirror " .. orig .. "" .. file)
- APT.exe("./apt-panopticon.lua " .. sendArgs .. " -o " .. orig .. path .. " " .. file):Nice():log():fork()
+ APT.exe("./apt-panopticon.lua " .. sendArgs .. " -o " .. orig .. path .. " " .. file):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(orig)
D('logging to ' .. APT.logName(ph.host, nil, file)[2])
else D("checkHost " .. orig .. arw .. host) end
end
@@ -618,7 +618,7 @@ local downloads = function(host, URL, meta, release, list)
end
end
f:close()
- APT.exe(cm):Nice():log():fork()
+ APT.exe(cm):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(host)
D('logging to ' .. log .. ', with these files')
end
@@ -1129,9 +1129,9 @@ if 0 < #arg then
APT.allpairs(ips,
function(k, v)
if v == "A" then
- if APT.testing("IPv4") then APT.exe('./apt-panopticon.lua ' .. sendArgs .. ' -4 ' .. pu.host .. path .. ' ' .. k .. ' ' .. file):Nice():log():fork() end
+ if APT.testing("IPv4") then APT.exe('./apt-panopticon.lua ' .. sendArgs .. ' -4 ' .. pu.host .. path .. ' ' .. k .. ' ' .. file):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(pu.host) end
elseif v == "AAAA" then
- if APT.testing("IPv6") then APT.exe('./apt-panopticon.lua ' .. sendArgs .. ' -6 ' .. APT.IPv46 .. ' ' .. pu.host .. path .. ' ' .. k .. ' ' .. file):Nice():log():fork() end
+ if APT.testing("IPv6") then APT.exe('./apt-panopticon.lua ' .. sendArgs .. ' -6 ' .. APT.IPv46 .. ' ' .. pu.host .. path .. ' ' .. k .. ' ' .. file):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(pu.host) end
end
D('logging to ' .. APT.logName(pu.host, k, file)[2])
end
diff --git a/laggers b/laggers
new file mode 100755
index 0000000..c5f30ee
--- /dev/null
+++ b/laggers
@@ -0,0 +1,3 @@
+#!/bin/bash
+echo "apt-panopticon processes still running -"
+ps ax -o pid,args --sort args | grep -E 'apt-panopticon\.lua | curl | dig ' | grep -v -E 'flock -n |grep -E |sh -c |timeout -k '
diff --git a/update_apt-panopticon b/update_apt-panopticon
index 8edb43d..abbc154 100755
--- a/update_apt-panopticon
+++ b/update_apt-panopticon
@@ -14,18 +14,26 @@ fi
# Check if the lock file still exists.
if [ -f apt-panopticon.lock ] ; then
# Check if it's still running.
- ps ax -eo pid,args | grep "luajit ./apt-panopticon.lua" | grep -v "grep luajit ./apt-panopticon.lua" | while read line ; do touch apt-panopticon.running ; exit ; done
+ ps ax -eo pid,args | grep "apt-panopticon.lua" | grep -v "grep apt-panopticon.lua" | while read line ; do touch apt-panopticon.running ; exit ; done
if [ -f apt-panopticon.running ] ; then
echo "Previous apt-panopticon still running, exiting."
+ echo "Previous apt-panopticon still running, exiting."
+ ./laggers
rm apt-panopticon.running
exit 1
fi
echo "Crashed apt-panopticon detected, removing stale lock file."
+ echo "Crashed apt-panopticon detected, removing stale lock file."
+ ./laggers
rm apt-panopticon.lock
fi
rm ../results; ln -s apt-panopticon/results_old ../results
-flock -n apt-panopticon.lock ./apt-panopticon.lua && rm apt-panopticon.lock
+flock -n apt-panopticon.lock ionice -c3 nice -n 19 timeout --kill-after=20.0 --foreground -v 8.5m ./apt-panopticon.lua && rm apt-panopticon.lock
+if [ -f apt-panopticon.lock ] ; then
+ echo "apt-panopticon timed out."
+ ./laggers
+fi
rm ../results; ln -s apt-panopticon/results ../results
chown -R www-data:www-data *
--
cgit v1.1