From 107f93b621ac3829e7aae687ede72473b5dad071 Mon Sep 17 00:00:00 2001 From: dvs1 Date: Sun, 16 Mar 2025 17:59:41 +1000 Subject: Spped things up by not downloading or converting things that didn't change. --- notYetAnotherWiki.lua | 174 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 69 deletions(-) (limited to 'notYetAnotherWiki.lua') diff --git a/notYetAnotherWiki.lua b/notYetAnotherWiki.lua index 53e3682..1cea01b 100755 --- a/notYetAnotherWiki.lua +++ b/notYetAnotherWiki.lua @@ -253,27 +253,34 @@ end -- Look for copied pages from the other wikis. for l in io.popen('find -L ' .. Directory .. ' -name "*.HTM" -type f,l -printf "%P\n"'):lines() do --- print('pandoc converting ' .. l .. ' -> ' .. string.sub(l, 1, -4) .. 'md') - -- Open the HTM files and do the initial cleanups, then pandoc them. - h = io.open(l, 'r') - if nil ~= h then - local body = h:read('*a') ; h:close() - if 'Foswiki' == string.sub(l, 1, 7) then - -- Strip out the actual content. - local beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, en + 1) end - beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, en + 1) end - beg, en = RE.find(body, [['
']]) if nil ~= beg then - if ' -- ' == string.sub(body, en + 1, en + 4) then - beg, en = RE.find(body, '[%nl]', en + 4) - body = string.sub(body, en + 1) +-- TODO - Only do this if .HTM is newer than .md, or .md doesn't exist. + + local htime = io.popen("date -ur " .. l .. " +%s"):read('l') + local mtime = io.popen("date -ur " .. string.sub(l, 1, -4) .. "md +%s 2>/dev/null"):read('l') + if (nil == mtime) or (htime > mtime) then + print('pandoc converting ' .. l .. ' -> ' .. string.sub(l, 1, -4) .. 'md') +os.execute('cp ' .. l .. ' ' .. l .. '_ORIGINAL0') + -- Open the HTM files and do the initial cleanups, then pandoc them. + h = io.open(l, 'r') + if nil ~= h then + local body = h:read('*a') ; h:close() +writeString(l .. '_ORIGINAL1', body) + if 'Foswiki' == string.sub(l, 1, 7) then + -- Strip out the actual content. + local beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, en + 1) end + beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, en + 1) end + beg, en = RE.find(body, [['
']]) if nil ~= beg then + if ' -- ' == string.sub(body, en + 1, en + 4) then + beg, en = RE.find(body, '[%nl]', en + 4) + body = string.sub(body, en + 1) + end end - end - beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end --- beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, 1, en + 1) end - beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end - beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end - -- Some clean ups. - local result = RE.compile( [[{~ + beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end +-- beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, 1, en + 1) end + beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end + beg, en = RE.find(body, [['
']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end + -- Some clean ups. + local result = RE.compile( [[{~ ( {'class="foswikiCurrentTopicLink"'} -> blank / {'class="foswikiNewLink"'} -> blank / @@ -287,41 +294,58 @@ for l in io.popen('find -L ' .. Directory .. ' -name "*.HTM" -type f,l -printf " -- {'style="' ([^"])+ '"'} -> blank / {"style='" ([^'])+ "'"} -> blank / . )* ~}]], { blank = function(a) return '' end } ):match(body) - body = result --- body = RE.gsub(body, [=[{""}]=], '') -- FIXME - local here = 1 - beg, en = RE.find(body, [['https://fos.wiki.devuan.org/']], here) - while nil ~= beg do - here = beg + 1 - local beg0, en0 - local url = nil - if '"' == string.sub(body, beg - 1, beg - 1) then - beg0, en0 = RE.find(body, [['"']], en) - url = string.sub(body, en + 1, en0 - 1) - end - if "'" == string.sub(body, beg - 1, beg - 1) then - beg0, en0 = RE.find(body, [["'"]], en) - url = string.sub(body, en + 1, en0) - end + body = result +-- body = RE.gsub(body, [=[{""}]=], '') -- FIXME + local here = 1 + beg, en = RE.find(body, [['https://fos.wiki.devuan.org/']], here) + while nil ~= beg do + here = beg + 1 + local beg0, en0 + local url = nil + if '"' == string.sub(body, beg - 1, beg - 1) then + beg0, en0 = RE.find(body, [['"']], en) + url = string.sub(body, en + 1, en0 - 1) + end + if "'" == string.sub(body, beg - 1, beg - 1) then + beg0, en0 = RE.find(body, [["'"]], en) + url = string.sub(body, en + 1, en0) + end - if nil ~= url then - if ('pub/' == string.sub(url, 1, 4)) then + if nil ~= url then + if ('pub/' == string.sub(url, 1, 4)) then -- FIXME? - evil hack? - url = 'Foswiki/' .. url - else - url = nil + url = 'Foswiki/' .. url +--print('FOSWIKI HTM ' .. url) + else + url = nil + end end +--print('HTM0 ' .. string.sub(body, beg, en + 84) .. ' \t\t') + beg, en, body, here = commonLinky(l, body, 'https://fos.wiki.devuan.org/', url, beg, en, beg0, en0, 1) +--if nil ~= en then print('HTM1 ' .. string.sub(body, beg, en + 84) .. ' \t\t') end +--[=[ + if nil == url then + print('OOPS! unknown linky - @' .. l .. '\t\t\t' .. string.sub(body, beg - 9, en) .. ' ' .. string.sub(body, en + 1, en0)) + else +-- print(' linky - @' .. l .. '\t\t\t' .. string.sub(body, beg - 9, en) .. ' ' .. string.sub(body, en + 1, en0) .. ' -> ' .. url) + local md = readMdMd(url, {}) +-- if nil ~= md then + if nil ~= md.realURL then url = md.realURL end +-- end + body = string.sub(body, 1, beg - 1) .. url .. string.sub(body, en0 + 1) + here = here + #url + end + beg, en = RE.find(body, [['https://fos.wiki.devuan.org/']], here) +]=] end - beg, en, body, here = commonLinky(l, body, 'https://fos.wiki.devuan.org/', url, beg, en, beg0, en0, 1) - end - writeString(l .. '_NEW', body) - elseif 'PmWiki' == string.sub(l, 1, 6) then - local beg, en = RE.find(body, [['']]) if nil ~= beg then body = string.sub(body, en + 2) end - beg, en = RE.find(body, [["div id='wikitext'>"]]) if nil ~= beg then body = string.sub(body, en + 2) end - beg, en = RE.find(body, [["
"]]) if nil ~= beg then body = string.sub(body, 1, beg - (2 + 9)) end -- There's a
to get rid of to. - beg, en = RE.find(body, [['']]) if nil ~= beg then body = string.sub(body, 1, beg - 2) end - local result = RE.compile( [[{~ + writeString(l .. '_NEW', body) + elseif 'PmWiki' == string.sub(l, 1, 6) then + local beg, en = RE.find(body, [['']]) if nil ~= beg then body = string.sub(body, en + 2) end + beg, en = RE.find(body, [["div id='wikitext'>"]]) if nil ~= beg then body = string.sub(body, en + 2) end + beg, en = RE.find(body, [["
"]]) if nil ~= beg then body = string.sub(body, 1, beg - (2 + 9)) end -- There's a
to get rid of to. + beg, en = RE.find(body, [['']]) if nil ~= beg then body = string.sub(body, 1, beg - 2) end + local result = RE.compile( [[{~ ( {"class='categorylink'"} -> blank / {"class='createlink'"} -> blank / @@ -341,31 +365,43 @@ for l in io.popen('find -L ' .. Directory .. ' -name "*.HTM" -type f,l -printf " {" "
])+} " >"]=], "")
+-- DONE? - 
   ...  lines of HTML code   ...   
+-- most of the time I'll see
+--		My own looking glass has several.
+--		Foswiki                     
+--		CommonMark->HTML  ---lua    
                        ..............................  
+-- Seems to be the spec way of doing it. +-- most of the time I'll see

+
+		here = 1
+		beg, en = RE.find(body, [["'https://wiki.devuan.org/"]], here)
+		while nil ~= beg do
+		    here = beg + 1
+		    local beg0, en0 = RE.find(body, [["'"]], en)
 -- FIXME? - This might be working around a bug elsewhere.
-		if "'" == string.sub(body, en0, en0) then en0 = en0 - 1 end
-		local url = string.sub(body, en + 1, en0)
-		if '?n=' == string.sub(url, 1, 3) then
-		    url = string.sub(url, 4):gsub('[%a]+%.([%a-]+)', '%1_pm.HTML')
-		elseif ("'" == url) or ('uploads/' == string.sub(url, 1, 8)) then
+		    if "'" == string.sub(body, en0, en0) then en0 = en0 - 1 end
+		    local url = string.sub(body, en + 1, en0)
+		    if '?n=' == string.sub(url, 1, 3) then
+			url = string.sub(url, 4):gsub('[%a]+%.([%a-]+)', '%1_pm.HTML')
+		    elseif ("'" == url) or ('uploads/' == string.sub(url, 1, 8)) then
 -- FIXME - evil hack?  Yep, evil hack, need to know the depth of the source, which isn't here.
-		    url = 'PmWiki/' .. url
-		else
-		    url = nil
+			url = 'PmWiki/' .. url
+		    else
+			url = nil
+		    end
+--print('HTM0 ' .. string.sub(body, beg, en + 84) .. ' \t\t')
+		    beg, en, body, here = commonLinky(l, body, "'https://wiki.devuan.org/", url, beg, en, beg0, en0, 0)
+--if nil ~= en then print('HTM1 ' .. string.sub(body, beg, en + 84) .. ' \t\t') end
 		end
-		beg, en, body, here = commonLinky(l, body, "'https://wiki.devuan.org/", url, beg, en, beg0, en0, 0)
-	    end
 
-	    writeString(l .. '_NEW', body)
+		writeString(l .. '_NEW', body)
+	    end
 	end
-    end
 
-    ok, rslt, status = os.execute('pandoc --wrap=preserve -f html -t commonmark_x --self-contained ' .. l .. '_NEW' .. ' >' .. string.sub(l, 1, -4) .. 'md')
+	ok, rslt, status = os.execute('pandoc --wrap=preserve -f html -t commonmark_x --self-contained ' .. l .. '_NEW' .. ' >' .. string.sub(l, 1, -4) .. 'md')
+    end
 end
 
 if '.' ~= Directory then
-- 
cgit v1.1