aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--TODO.md9
-rwxr-xr-xnotYetAnotherWiki.lua174
2 files changed, 105 insertions, 78 deletions
diff --git a/TODO.md b/TODO.md
index ea36e19..518cacc 100644
--- a/TODO.md
+++ b/TODO.md
@@ -4,15 +4,6 @@ Make it perphekd!
4 4
5## Do these 5## Do these
6 6
7Check the timestamps on the files, only update if source is newer than destination. Meh, it's already 600 times faster than the pandoc version.
8
9- One quirk to watch for is if a URL path changes, the docs that have that URL need to be redone.
10- pandoc is a lot slower though, so do this for sure when dealing with that.
11- When scraping the web sites, they tend to be dynamically generated with no useful timestamp on them.
12- The web site scrape happens locally anyway, I can compare source file timestamps.
13- + So check timestamps when "downloading" the original, and before running pandoc on the result. Think that's the most time consuming steps.
14- + Since this only stops updates of existing files, URLs changing are not a problem.
15
16Add atom feed for single page. Alas cgit only seems to have ATOM feed on the whole repo, not individual files. 7Add atom feed for single page. Alas cgit only seems to have ATOM feed on the whole repo, not individual files.
17 8
18- However, once timestamps are sorted, I can use that code to generate (static?) RSS and ATOM feeds, and create page histories using diffs. 9- However, once timestamps are sorted, I can use that code to generate (static?) RSS and ATOM feeds, and create page histories using diffs.
diff --git a/notYetAnotherWiki.lua b/notYetAnotherWiki.lua
index 53e3682..1cea01b 100755
--- a/notYetAnotherWiki.lua
+++ b/notYetAnotherWiki.lua
@@ -253,27 +253,34 @@ end
253 253
254-- Look for copied pages from the other wikis. 254-- Look for copied pages from the other wikis.
255for l in io.popen('find -L ' .. Directory .. ' -name "*.HTM" -type f,l -printf "%P\n"'):lines() do 255for l in io.popen('find -L ' .. Directory .. ' -name "*.HTM" -type f,l -printf "%P\n"'):lines() do
256-- print('pandoc converting ' .. l .. ' -> ' .. string.sub(l, 1, -4) .. 'md') 256-- TODO - Only do this if .HTM is newer than .md, or .md doesn't exist.
257 -- Open the HTM files and do the initial cleanups, then pandoc them. 257
258 h = io.open(l, 'r') 258 local htime = io.popen("date -ur " .. l .. " +%s"):read('l')
259 if nil ~= h then 259 local mtime = io.popen("date -ur " .. string.sub(l, 1, -4) .. "md +%s 2>/dev/null"):read('l')
260 local body = h:read('*a') ; h:close() 260 if (nil == mtime) or (htime > mtime) then
261 if 'Foswiki' == string.sub(l, 1, 7) then 261 print('pandoc converting ' .. l .. ' -> ' .. string.sub(l, 1, -4) .. 'md')
262 -- Strip out the actual content. 262os.execute('cp ' .. l .. ' ' .. l .. '_ORIGINAL0')
263 local beg, en = RE.find(body, [['<div id="patternMainContents">']]) if nil ~= beg then body = string.sub(body, en + 1) end 263 -- Open the HTM files and do the initial cleanups, then pandoc them.
264 beg, en = RE.find(body, [['<div class="patternContent">']]) if nil ~= beg then body = string.sub(body, en + 1) end 264 h = io.open(l, 'r')
265 beg, en = RE.find(body, [['<div class="foswikiTopic">']]) if nil ~= beg then 265 if nil ~= h then
266 if ' -- ' == string.sub(body, en + 1, en + 4) then 266 local body = h:read('*a') ; h:close()
267 beg, en = RE.find(body, '[%nl]', en + 4) 267writeString(l .. '_ORIGINAL1', body)
268 body = string.sub(body, en + 1) 268 if 'Foswiki' == string.sub(l, 1, 7) then
269 -- Strip out the actual content.
270 local beg, en = RE.find(body, [['<div id="patternMainContents">']]) if nil ~= beg then body = string.sub(body, en + 1) end
271 beg, en = RE.find(body, [['<div class="patternContent">']]) if nil ~= beg then body = string.sub(body, en + 1) end
272 beg, en = RE.find(body, [['<div class="foswikiTopic">']]) if nil ~= beg then
273 if ' -- ' == string.sub(body, en + 1, en + 4) then
274 beg, en = RE.find(body, '[%nl]', en + 4)
275 body = string.sub(body, en + 1)
276 end
269 end 277 end
270 end 278 beg, en = RE.find(body, [['<div class="patternInfo">']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end
271 beg, en = RE.find(body, [['<div class="patternInfo">']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end 279-- beg, en = RE.find(body, [['<div class="foswikiForm foswikiFormStep">']]) if nil ~= beg then body = string.sub(body, 1, en + 1) end
272-- beg, en = RE.find(body, [['<div class="foswikiForm foswikiFormStep">']]) if nil ~= beg then body = string.sub(body, 1, en + 1) end 280 beg, en = RE.find(body, [['<div class="foswikiAttachments foswikiFormStep" style="overflow:auto">']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end
273 beg, en = RE.find(body, [['<div class="foswikiAttachments foswikiFormStep" style="overflow:auto">']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end 281 beg, en = RE.find(body, [['<div class="foswikiSearchResultsPager">']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end
274 beg, en = RE.find(body, [['<div class="foswikiSearchResultsPager">']]) if nil ~= beg then body = string.sub(body, 1, beg - 1) end 282 -- Some clean ups.
275 -- Some clean ups. 283 local result = RE.compile( [[{~
276 local result = RE.compile( [[{~
277 ( 284 (
278 {'class="foswikiCurrentTopicLink"'} -> blank / 285 {'class="foswikiCurrentTopicLink"'} -> blank /
279 {'class="foswikiNewLink"'} -> blank / 286 {'class="foswikiNewLink"'} -> blank /
@@ -287,41 +294,58 @@ for l in io.popen('find -L ' .. Directory .. ' -name "*.HTM" -type f,l -printf "
287-- {'style="' ([^"])+ '"'} -> blank / {"style='" ([^'])+ "'"} -> blank / 294-- {'style="' ([^"])+ '"'} -> blank / {"style='" ([^'])+ "'"} -> blank /
288 . 295 .
289 )* ~}]], { blank = function(a) return '' end } ):match(body) 296 )* ~}]], { blank = function(a) return '' end } ):match(body)
290 body = result 297 body = result
291-- body = RE.gsub(body, [=[{"<!-- ".*"-->"}]=], '') -- FIXME 298-- body = RE.gsub(body, [=[{"<!-- ".*"-->"}]=], '') -- FIXME
292 local here = 1 299 local here = 1
293 beg, en = RE.find(body, [['https://fos.wiki.devuan.org/']], here) 300 beg, en = RE.find(body, [['https://fos.wiki.devuan.org/']], here)
294 while nil ~= beg do 301 while nil ~= beg do
295 here = beg + 1 302 here = beg + 1
296 local beg0, en0 303 local beg0, en0
297 local url = nil 304 local url = nil
298 if '"' == string.sub(body, beg - 1, beg - 1) then 305 if '"' == string.sub(body, beg - 1, beg - 1) then
299 beg0, en0 = RE.find(body, [['"']], en) 306 beg0, en0 = RE.find(body, [['"']], en)
300 url = string.sub(body, en + 1, en0 - 1) 307 url = string.sub(body, en + 1, en0 - 1)
301 end 308 end
302 if "'" == string.sub(body, beg - 1, beg - 1) then 309 if "'" == string.sub(body, beg - 1, beg - 1) then
303 beg0, en0 = RE.find(body, [["'"]], en) 310 beg0, en0 = RE.find(body, [["'"]], en)
304 url = string.sub(body, en + 1, en0) 311 url = string.sub(body, en + 1, en0)
305 end 312 end
306 313
307 if nil ~= url then 314 if nil ~= url then
308 if ('pub/' == string.sub(url, 1, 4)) then 315 if ('pub/' == string.sub(url, 1, 4)) then
309-- FIXME? - evil hack? 316-- FIXME? - evil hack?
310 url = 'Foswiki/' .. url 317 url = 'Foswiki/' .. url
311 else 318--print('FOSWIKI HTM ' .. url)
312 url = nil 319 else
320 url = nil
321 end
313 end 322 end
323--print('HTM0 ' .. string.sub(body, beg, en + 84) .. ' \t\t')
324 beg, en, body, here = commonLinky(l, body, 'https://fos.wiki.devuan.org/', url, beg, en, beg0, en0, 1)
325--if nil ~= en then print('HTM1 ' .. string.sub(body, beg, en + 84) .. ' \t\t') end
326--[=[
327 if nil == url then
328 print('OOPS! unknown linky - @' .. l .. '\t\t\t' .. string.sub(body, beg - 9, en) .. ' ' .. string.sub(body, en + 1, en0))
329 else
330-- print(' linky - @' .. l .. '\t\t\t' .. string.sub(body, beg - 9, en) .. ' ' .. string.sub(body, en + 1, en0) .. ' -> ' .. url)
331 local md = readMdMd(url, {})
332-- if nil ~= md then
333 if nil ~= md.realURL then url = md.realURL end
334-- end
335 body = string.sub(body, 1, beg - 1) .. url .. string.sub(body, en0 + 1)
336 here = here + #url
337 end
338 beg, en = RE.find(body, [['https://fos.wiki.devuan.org/']], here)
339]=]
314 end 340 end
315 beg, en, body, here = commonLinky(l, body, 'https://fos.wiki.devuan.org/', url, beg, en, beg0, en0, 1)
316 end
317 341
318 writeString(l .. '_NEW', body) 342 writeString(l .. '_NEW', body)
319 elseif 'PmWiki' == string.sub(l, 1, 6) then 343 elseif 'PmWiki' == string.sub(l, 1, 6) then
320 local beg, en = RE.find(body, [['<!--PageText-->']]) if nil ~= beg then body = string.sub(body, en + 2) end 344 local beg, en = RE.find(body, [['<!--PageText-->']]) if nil ~= beg then body = string.sub(body, en + 2) end
321 beg, en = RE.find(body, [["div id='wikitext'>"]]) if nil ~= beg then body = string.sub(body, en + 2) end 345 beg, en = RE.find(body, [["div id='wikitext'>"]]) if nil ~= beg then body = string.sub(body, en + 2) end
322 beg, en = RE.find(body, [["<div id='printfoot'>"]]) if nil ~= beg then body = string.sub(body, 1, beg - (2 + 9)) end -- There's a </div> to get rid of to. 346 beg, en = RE.find(body, [["<div id='printfoot'>"]]) if nil ~= beg then body = string.sub(body, 1, beg - (2 + 9)) end -- There's a </div> to get rid of to.
323 beg, en = RE.find(body, [['<!--HTMLFooter-->']]) if nil ~= beg then body = string.sub(body, 1, beg - 2) end 347 beg, en = RE.find(body, [['<!--HTMLFooter-->']]) if nil ~= beg then body = string.sub(body, 1, beg - 2) end
324 local result = RE.compile( [[{~ 348 local result = RE.compile( [[{~
325 ( 349 (
326 {"class='categorylink'"} -> blank / 350 {"class='categorylink'"} -> blank /
327 {"class='createlink'"} -> blank / 351 {"class='createlink'"} -> blank /
@@ -341,31 +365,43 @@ for l in io.popen('find -L ' .. Directory .. ' -name "*.HTM" -type f,l -printf "
341 {"<span class='hlt " {([a-z])+} "'></span><pre" } -> "<pre class='%2'" / 365 {"<span class='hlt " {([a-z])+} "'></span><pre" } -> "<pre class='%2'" /
342 . 366 .
343 )* ~}]], { blank = function(a) return '' end } ):match(body) 367 )* ~}]], { blank = function(a) return '' end } ):match(body)
344 body = result 368 body = result
345 here = 1 369-- body = RE.gsub(body, [=["<a " {([^ >])+} " >"]=], "<a %1>")
346 beg, en = RE.find(body, [["'https://wiki.devuan.org/"]], here) 370-- DONE? - <span class='hlt html'></span><pre style='background-color: #cc00ff;' class='escaped'> ... lines of HTML code ... </pre>
347 while nil ~= beg do 371-- most of the time I'll see <pre class='escaped'>
348 here = beg + 1 372-- My own looking glass has several.
349 local beg0, en0 = RE.find(body, [["'"]], en) 373-- Foswiki <pre class='bash'>
374-- CommonMark->HTML ---lua <pre><code class="language-lua"> .............................. </code></pre>
375-- Seems to be the spec way of doing it.
376-- most of the time I'll see <pre><code>
377
378 here = 1
379 beg, en = RE.find(body, [["'https://wiki.devuan.org/"]], here)
380 while nil ~= beg do
381 here = beg + 1
382 local beg0, en0 = RE.find(body, [["'"]], en)
350-- FIXME? - This might be working around a bug elsewhere. 383-- FIXME? - This might be working around a bug elsewhere.
351 if "'" == string.sub(body, en0, en0) then en0 = en0 - 1 end 384 if "'" == string.sub(body, en0, en0) then en0 = en0 - 1 end
352 local url = string.sub(body, en + 1, en0) 385 local url = string.sub(body, en + 1, en0)
353 if '?n=' == string.sub(url, 1, 3) then 386 if '?n=' == string.sub(url, 1, 3) then
354 url = string.sub(url, 4):gsub('[%a]+%.([%a-]+)', '%1_pm.HTML') 387 url = string.sub(url, 4):gsub('[%a]+%.([%a-]+)', '%1_pm.HTML')
355 elseif ("'" == url) or ('uploads/' == string.sub(url, 1, 8)) then 388 elseif ("'" == url) or ('uploads/' == string.sub(url, 1, 8)) then
356-- FIXME - evil hack? Yep, evil hack, need to know the depth of the source, which isn't here. 389-- FIXME - evil hack? Yep, evil hack, need to know the depth of the source, which isn't here.
357 url = 'PmWiki/' .. url 390 url = 'PmWiki/' .. url
358 else 391 else
359 url = nil 392 url = nil
393 end
394--print('HTM0 ' .. string.sub(body, beg, en + 84) .. ' \t\t')
395 beg, en, body, here = commonLinky(l, body, "'https://wiki.devuan.org/", url, beg, en, beg0, en0, 0)
396--if nil ~= en then print('HTM1 ' .. string.sub(body, beg, en + 84) .. ' \t\t') end
360 end 397 end
361 beg, en, body, here = commonLinky(l, body, "'https://wiki.devuan.org/", url, beg, en, beg0, en0, 0)
362 end
363 398
364 writeString(l .. '_NEW', body) 399 writeString(l .. '_NEW', body)
400 end
365 end 401 end
366 end
367 402
368 ok, rslt, status = os.execute('pandoc --wrap=preserve -f html -t commonmark_x --self-contained ' .. l .. '_NEW' .. ' >' .. string.sub(l, 1, -4) .. 'md') 403 ok, rslt, status = os.execute('pandoc --wrap=preserve -f html -t commonmark_x --self-contained ' .. l .. '_NEW' .. ' >' .. string.sub(l, 1, -4) .. 'md')
404 end
369end 405end
370 406
371if '.' ~= Directory then 407if '.' ~= Directory then