From fab1f109d0564b44c950055d3233d6e7a1f3be81 Mon Sep 17 00:00:00 2001 From: onefang Date: Thu, 23 Feb 2023 02:26:36 +1000 Subject: Add yet another timeout watchdog. --- README.md | 1 + apt-panopticommon.lua | 35 ++++++++++++++++++++++++++++------- apt-panopticon.lua | 14 +++++++------- laggers | 3 +++ update_apt-panopticon | 12 ++++++++++-- 5 files changed, 49 insertions(+), 16 deletions(-) create mode 100755 laggers diff --git a/README.md b/README.md index 2edc3ca..7e5d6d2 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ installed - * lua-rrd * LuaSocket, on Debian based systems it'll be in the lua-socket package. * md5sum and sha256, on Debian based systems they'll be in the coreutils package. +* timeout, on Debian based systems it'll be in the coreutils package. * rrdtool * xz, on Debian based systems it'll be in the xz-utils package. diff --git a/apt-panopticommon.lua b/apt-panopticommon.lua index da1fc3d..a2def5a 100644 --- a/apt-panopticommon.lua +++ b/apt-panopticommon.lua @@ -134,7 +134,7 @@ APT.parseArgs = function(args) local arg = {} local sendArgs = "" -- A special test to disable IPv6 tests if IPv6 isn't available. - if 1 == APT.exe('ip -6 addr | grep inet6 | grep " global"'):Do().status then + if 1 == APT.exe('ip -6 addr | grep inet6 | grep " global"'):timeout():Do().status then table.insert(args, '--tests=-IPv6') end if 0 ~= #(args) then @@ -529,7 +529,7 @@ APT.tested = function(prot, test, host) end APT.exe = function(c) - local exe = {status = 0, result = '', log = true, cmd = c .. ' '} + local exe = {status = 0, result = '', log = true, cmd = c .. ' ', command = c} function exe:log() self.log = true @@ -543,6 +543,16 @@ APT.exe = function(c) end return self end + function exe:timeout(c) + -- timeout returns a status of - command status if --preserve-status; "128+9" (actually 137) if --kill-after ends up being done; 124 if it had to TERM; command status if all went well. + -- --kill-after means "send KILL after TERM fails. + if nil == c then + self.cmd = 'timeout --kill-after=10.0 --foreground -v 42.0s ' .. self.cmd + else + self.cmd = 'timeout --kill-after=10.0 --foreground -v ' .. c .. ' ' .. self.cmd + end + return self + end function exe:also(c) if nil == c then c = '' else c = ' ' .. c end self.cmd = self.cmd .. ';' .. c .. ' ' @@ -579,17 +589,28 @@ APT.exe = function(c) I'm getting 7168 or 0. No idea what the fuck that is. local ok, rslt, status = os.execute(s) ]] - local f = APT.readCmd(self.cmd .. ' ; echo "$?"', 'r') + local f = APT.readCmd(self.cmd, 'r') -- The last line will be the command's returned status, collect everything else in result. self.status = '' -- Otherwise the result starts with 0. + self.result = '\n' + for i,l in ipairs(f) do + self.result = self.result .. l .. "\n" + end + f = APT.readCmd('echo "$?"', 'r') for i,l in ipairs(f) do - self.result = self.result .. self.status .. "\n" - self.status = l + self.status = tonumber(l) + if (137 == self.status) or (124 == self.status) then + print("timeout killed " .. self.status .. ' ' .. self.command) + E("timeout killed " .. self.status .. ' ' .. self.command) + elseif (0 ~= self.status) then + print("status |" .. self.status .. '| ' .. self.command) + E("status |" .. self.status .. '| ' .. self.command) + end end - self.status = tonumber(self.status) return self end - function exe:fork() + function exe:fork(host) + if nil ~= host then self.cmd = self.cmd .. '; r=$?; if [ $r -ge 124 ]; then echo "$r ' .. host .. ' failed forked command ' .. string.gsub(self.cmd, '"', "'") .. '"; fi' end self.cmd = '{ ' .. self.cmd .. '; } &' if true == self.log then D(" forking -   " .. self.cmd .. "") end os.execute(self.cmd) diff --git a/apt-panopticon.lua b/apt-panopticon.lua index 8fd39e5..93243a4 100755 --- a/apt-panopticon.lua +++ b/apt-panopticon.lua @@ -330,7 +330,7 @@ checkHEAD = function (host, URL, r, retry, sanity) 'curl -I --retry 0 -s --path-as-is --connect-timeout ' .. APT.options.timeout.value .. ' --max-redirs 0 ' .. APT.IPv46 .. ' ' .. IP .. ' ' .. '-o /dev/null -D results/"HEADERS_' .. fname .. '" ' .. hdr .. ' -w "#%{http_code} %{ssl_verify_result} %{url_effective}\\n" ' .. PU.scheme .. '://' .. host .. PU.path .. ' >>results/"STATUS_' .. fname .. '"' - ):Nice():log():Do().status + ):timeout(APT.options.maxtime.value * 2.0):Nice():log():Do().status if 0 < r then APT.tested(PU.scheme, 'Redirects', host) else @@ -356,7 +356,7 @@ checkHEAD = function (host, URL, r, retry, sanity) if 0 ~= status then local msg = curlStatus[status] if nil == msg then msg = "UNKNOWN CURL STATUS CODE!" end - if (28 == status) or (7 == status) then + if (128+9 == status) or (124 == status) or (28 == status) or (7 == status) then T(spcd .. spcd .. "TIMEOUT " .. timeouts + 1 .. ", retry " .. retry + 1 .. ' ' .. APT.lnk(URL), PU.scheme, sanity, host) timeouts = timeouts + 1 else @@ -447,7 +447,7 @@ checkHEAD = function (host, URL, r, retry, sanity) local pth = path:match('^(.*/pool/).*$') if nil ~= pth then table.insert(APT.results[PU.scheme].redirects, pu.host .. "/" .. pth) else E(spcd .. spcd .. 'Odd redirect path ' .. path) end I(spcd .. spcd .. "Now checking redirected host " .. u .. '   for   ' .. APT.lnk(URL) .. arw .. APT.lnk(location), host) - APT.exe(downloadLock .. "REDIR-" .. check .. ".log.txt" .. " ./apt-panopticon.lua " .. extraArgs .. ' ' .. pu.host .. "/" .. path .. " " .. file):Nice():log():fork() + APT.exe(downloadLock .. "REDIR-" .. check .. ".log.txt" .. " ./apt-panopticon.lua " .. extraArgs .. ' ' .. pu.host .. "/" .. path .. " " .. file):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(pu.host) D(spcd .. 'logging to ' .. APT.logName(pu.host, nil, file)[2]) APT.tested(PU.scheme, 'Redirects', host) end @@ -534,7 +534,7 @@ checkHost = function (orig, host, path, ip, file) else if orig == host then I("Testing mirror " .. orig .. "" .. file) - APT.exe("./apt-panopticon.lua " .. sendArgs .. " -o " .. orig .. path .. " " .. file):Nice():log():fork() + APT.exe("./apt-panopticon.lua " .. sendArgs .. " -o " .. orig .. path .. " " .. file):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(orig) D('logging to ' .. APT.logName(ph.host, nil, file)[2]) else D("checkHost " .. orig .. arw .. host) end end @@ -618,7 +618,7 @@ local downloads = function(host, URL, meta, release, list) end end f:close() - APT.exe(cm):Nice():log():fork() + APT.exe(cm):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(host) D('logging to ' .. log .. ', with these files') end @@ -1129,9 +1129,9 @@ if 0 < #arg then APT.allpairs(ips, function(k, v) if v == "A" then - if APT.testing("IPv4") then APT.exe('./apt-panopticon.lua ' .. sendArgs .. ' -4 ' .. pu.host .. path .. ' ' .. k .. ' ' .. file):Nice():log():fork() end + if APT.testing("IPv4") then APT.exe('./apt-panopticon.lua ' .. sendArgs .. ' -4 ' .. pu.host .. path .. ' ' .. k .. ' ' .. file):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(pu.host) end elseif v == "AAAA" then - if APT.testing("IPv6") then APT.exe('./apt-panopticon.lua ' .. sendArgs .. ' -6 ' .. APT.IPv46 .. ' ' .. pu.host .. path .. ' ' .. k .. ' ' .. file):Nice():log():fork() end + if APT.testing("IPv6") then APT.exe('./apt-panopticon.lua ' .. sendArgs .. ' -6 ' .. APT.IPv46 .. ' ' .. pu.host .. path .. ' ' .. k .. ' ' .. file):timeout(APT.options.maxtime.value * 2.0):Nice():log():fork(pu.host) end end D('logging to ' .. APT.logName(pu.host, k, file)[2]) end diff --git a/laggers b/laggers new file mode 100755 index 0000000..c5f30ee --- /dev/null +++ b/laggers @@ -0,0 +1,3 @@ +#!/bin/bash +echo "apt-panopticon processes still running -" +ps ax -o pid,args --sort args | grep -E 'apt-panopticon\.lua | curl | dig ' | grep -v -E 'flock -n |grep -E |sh -c |timeout -k ' diff --git a/update_apt-panopticon b/update_apt-panopticon index 8edb43d..abbc154 100755 --- a/update_apt-panopticon +++ b/update_apt-panopticon @@ -14,18 +14,26 @@ fi # Check if the lock file still exists. if [ -f apt-panopticon.lock ] ; then # Check if it's still running. - ps ax -eo pid,args | grep "luajit ./apt-panopticon.lua" | grep -v "grep luajit ./apt-panopticon.lua" | while read line ; do touch apt-panopticon.running ; exit ; done + ps ax -eo pid,args | grep "apt-panopticon.lua" | grep -v "grep apt-panopticon.lua" | while read line ; do touch apt-panopticon.running ; exit ; done if [ -f apt-panopticon.running ] ; then echo "Previous apt-panopticon still running, exiting." + echo "Previous apt-panopticon still running, exiting." + ./laggers rm apt-panopticon.running exit 1 fi echo "Crashed apt-panopticon detected, removing stale lock file." + echo "Crashed apt-panopticon detected, removing stale lock file." + ./laggers rm apt-panopticon.lock fi rm ../results; ln -s apt-panopticon/results_old ../results -flock -n apt-panopticon.lock ./apt-panopticon.lua && rm apt-panopticon.lock +flock -n apt-panopticon.lock ionice -c3 nice -n 19 timeout --kill-after=20.0 --foreground -v 8.5m ./apt-panopticon.lua && rm apt-panopticon.lock +if [ -f apt-panopticon.lock ] ; then + echo "apt-panopticon timed out." + ./laggers +fi rm ../results; ln -s apt-panopticon/results ../results chown -R www-data:www-data * -- cgit v1.1