From 58d81d1a6d4dd09a8f19d4c42ab45caae7db030d Mon Sep 17 00:00:00 2001 From: dvs1 Date: Sun, 19 Jan 2025 21:41:47 +1000 Subject: Use the wiki print button, filter the wiki HTML early, and use cmark-gfm. --- SuckItFos | 43 +++++++++++++++++++++++++------------- SuckItPm | 58 +++++++++++++++++++++++++++------------------------ TODO.md | 1 - notYetAnotherWiki.lua | 54 ++++++++++++++++++++++++++++------------------- 4 files changed, 92 insertions(+), 64 deletions(-) diff --git a/SuckItFos b/SuckItFos index 54fc376..b59f6b0 100755 --- a/SuckItFos +++ b/SuckItFos @@ -21,16 +21,34 @@ do mkdir -p Foswiki/${base}/`dirname ${file}` mkdir -p combined/$base mkdir -p combined/${base}/`dirname ${file}` - echo "Converting ${URL}/${base}/${file} -> Foswiki/${base}/${file}.md" + echo "Converting ${URL}/${base}/${file}?cover=print -> Foswiki/${base}/${file}.md" # pandoc -f html -t markdown --self-contained ${URL}/${base}/${file} >Foswiki/${base}/${file}.md # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. - curl --silent --no-progress-meter ${URL}/${base}/${file} -o Foswiki/${base}/${file}.HTM + curl --silent --no-progress-meter ${URL}/${base}/${file}?cover=print -o Foswiki/${base}/${file}.HTM + + cp Foswiki/${base}/${file}.HTM Foswiki/${base}/${file}.HTM_ORIGINAL + csplit -ks Foswiki/${base}/${file}.HTM '%
%' '/
/' + if [ -f xx00 ]; then + rm Foswiki/${base}/${file}.HTM + mv xx00 Foswiki/${base}/${file}.HTM + fi + sed -i -E Foswiki/${base}/${file}.HTM \ + -e "s/rel='nofollow'//g" \ + -e 's/rel="nofollow"//g' \ + -e "s/target='_blank'//g" \ + -e "s/class='foswiki[[:alpha:]]*'//g" \ + -e 's/class="foswikiTopic"/class="FoswikiTopic"/g' \ + -e 's/class="foswiki[[:alpha:]]*"//g' \ + -e "s/style='.*;'//g" +# -e "s/style='background-color: #.*;'//g" \ +# -e "s/style='font-size: .*;'//g" + pandoc -f html -t commonmark_x --self-contained Foswiki//${base}/${file}.HTM >Foswiki/${base}/${file}.md - ln -frs Foswiki/${base}/${file}.md combined/${base}/${file}.md cp Foswiki/${base}/${file}.md Foswiki/${base}/${file}.md_ORIGINAL - csplit -ks Foswiki/${base}/${file}.md '%::: {.foswikiTopic}%' '/::: {.foswikiContentFooter}/' +# csplit -ks Foswiki/${base}/${file}.md '%::: {.foswikiTopic}%' '/::: {.foswikiContentFooter}/' + csplit -ks Foswiki/${base}/${file}.md '%::: {.FoswikiTopic}%' '/::: {.patternInfo}/' if [ -f xx00 ]; then rm Foswiki/${base}/${file}.md mv xx00 Foswiki/${base}/${file}.md @@ -40,21 +58,16 @@ do sed -i -E Foswiki/${base}/${file}.md \ -e 's/\$/\$dlr\$/g' \ -e 's/\{#.*\}//g' \ - -e 's/\{\.foswiki.*\}//g' \ - -e 's/\{\.foswiki.*//g' \ - -e 's/\{\.foswikiNewLink rel=“nofollow”\}//g' \ - -e 's/\{\.foswikiNewLink$//g' \ - -e 's/^\.foswiki.*\}//g' \ -e 's/\{\.pattern.*\}//g' \ -e 's/\{\.pattern.*//g' \ - -e 's/\{rel="nofollow"\}//g' \ - -e 's/^rel="nofollow"\}//g' \ - -e 's/rel=“nofollow”\}$//g' \ + -e '/^/d' \ -e '/^:::/d' - echo -e "****\n[Original page](${URL}/${base}/${file}) where you can edit it." >> Foswiki/${base}/${file}.md + echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> Foswiki/${base}/${file}.md -# pandoc -t html -f commonmark_x --self-contained Foswiki/${base}/${file}.md > Foswiki//${base}/${file}.htm - cmark-gfm -t html -e footnotes -e table -e strikethrough Foswiki/${base}/${file}.md > Foswiki//${base}/${file}.body +# pandoc -t html -f commonmark_x --self-contained Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.htm +# cmark-gfm -t html -e footnotes -e table -e strikethrough Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.body +# ln -frs Foswiki/${base}/${file}.body combined/${base}/${file}.body + ln -frs Foswiki/${base}/${file}.md combined/${base}/${file}.md done notYetAnotherWiki.lua diff --git a/SuckItPm b/SuckItPm index 64591c3..156ee9f 100755 --- a/SuckItPm +++ b/SuckItPm @@ -19,30 +19,44 @@ do file=`echo "${line}" | cut -d '.' -f 2` mkdir -p PmWiki/$base mkdir -p combined/$base - echo "Converting ${URL}/?n=${base}.${file} -> PmWiki/${base}/${file}.md" + echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. # curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM - curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD -# pandoc -f html -t commonmark_x --self-contained PmWiki//${base}/${file}.HTM >PmWiki/${base}/${file}.md - pandoc -f markdown -t commonmark_x --self-contained PmWiki//${base}/${file}.MD >PmWiki/${base}/${file}.md - ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md - cp PmWiki/${base}/${file}.md PmWiki/${base}/${file}.md_ORIGINAL - - csplit -ks PmWiki/${base}/${file}.md '%trailstart%' '/trailend/' -# csplit -ks PmWiki/${base}/${file}.md '%::: {#wikitext}%' '/::: {#wikifoot-links .footnav}/' +# curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD + curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM + cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL + csplit -ks PmWiki/${base}/${file}.HTM '%%' '//' if [ -f xx00 ]; then - rm PmWiki/${base}/${file}.md - mv xx00 PmWiki/${base}/${file}.md + rm PmWiki/${base}/${file}.HTM + mv xx00 PmWiki/${base}/${file}.HTM fi + sed -i -E PmWiki/${base}/${file}.HTM \ + -e "s/rel='nofollow'//g" \ + -e "s/target='_blank'//g" \ + -e "s/class='createlink'//g" \ + -e "s/class='createlinktext'//g" \ + -e "s/class='escaped'//g" \ + -e "s/class='diffmarkup'//g" \ + -e "s/class='selflink'//g" \ + -e "s/class='urllink'//g" \ + -e "s/class='vspace'//g" \ + -e "s/class='wikilink'//g" \ + -e "s/style='.*;'//g" +# -e "s/class='.*'//g" \ +# -e "s/style='background-color: #.*;'//g" \ +# -e "s/style='font-size: .*;'//g" + + pandoc -f html -t commonmark_x --self-contained PmWiki//${base}/${file}.HTM >PmWiki/${base}/${file}.md +# pandoc -f markdown -t commonmark_x --self-contained PmWiki//${base}/${file}.MD >PmWiki/${base}/${file}.md + cp PmWiki/${base}/${file}.md PmWiki/${base}/${file}.md_ORIGINAL # Attempt to clean things up, badly. sed -i -E PmWiki/${base}/${file}.md \ -e 's/\$/\$dlr\$/g' \ -e 's/\{#.*\}//g' \ -e '/^:::/d' \ - -e '/\{\.wikilink\}/d' \ -e '/\[Site$/d' \ -e '/^Page last modified on /d' \ -e '/^\[\]/d' \ @@ -52,22 +66,12 @@ do -e "s/^\`\`\`//g" \ -e "s/\`\{=html\}//g" -# -e 's/\{rel=".*\}//g' \ -# -e 's/\{rel="nofollow"$//g' \ -# -e 's/^rel="nofollow"\}//g' \ -# -e 's/^target="_blank"\}//g' \ -# -e 's/\{\.createlinktext.*\}//g' \ -# -e 's/\{\.createlinktext$//g' \ -# -e 's/\{\.createlink.*\}//g' \ -# -e 's/\{\.createlink$//g' \ -# -e 's/\{\.urllink.*\}//g' \ -# -e 's/\{\.urllink$//g' - -#echo "

Original page where you can edit it.

" >> PmWiki/${base}/${file}.md - echo -e "****\n[Original page](${URL}/${base}/${file}) where you can edit it." >> PmWiki/${base}/${file}.md + echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> PmWiki/${base}/${file}.md -# pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki//${base}/${file}.htm - cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki//${base}/${file}.body +# pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm +# cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body +# ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body + ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md done notYetAnotherWiki.lua diff --git a/TODO.md b/TODO.md index 773c3c9..2062323 100644 --- a/TODO.md +++ b/TODO.md @@ -1,7 +1,6 @@ # TODO ## Do these -See if any wiki specific MarkDown outputting modules can help. Already installed one for PmWiki, which works now. See if I can find something similar for Foswiki. Check the timestamps on the files, only update if source is newer than destination. Meh, it's already 600 times faster than the pandoc version. - One quirk to watch for is if a URL path changes, the docs that have that URL need to be redone. diff --git a/notYetAnotherWiki.lua b/notYetAnotherWiki.lua index d592d06..c7b205d 100755 --- a/notYetAnotherWiki.lua +++ b/notYetAnotherWiki.lua @@ -24,11 +24,16 @@ local Sites, Files, Subs = {}, {}, {} -- Useful functions, part 0. -- A simple table.subtable = subtable wont work, you end up with a reference so that changes to the later get applaid to the former. -local copyTable = function(t, strip) +local derefiTable = function(t, strip) local argh = {} for l, y in ipairs(t) do if (l ~= y.name) and strip then table.insert(argh, y) end end return argh end +local derefTable = function(t, strip) + local argh = {} + for l, y in pairs(t) do argh[l] = y end + return argh +end -- String together the bits array into a path string. @@ -79,7 +84,7 @@ for name, file in pairs(Files) do path = path .. d if nil == Subs[path] then Subs[path] = {files = {}, subs = {}} end if i < ln then Subs[path].subs[bits[i + 1]] = bits[i + 1] end - Subs[path].bits = copyTable(bits, true) + Subs[path].bits = derefiTable(bits, true) if i < ln then table.remove(Subs[path].bits, #bits) end end @@ -97,10 +102,14 @@ for name, file in pairs(Files) do Files[name] = nil else -- Ordinary md file, stash it's metadata and parsed body. - h = io.open(name .. '.body', 'r') + -- I need cmark-gfm, coz lcmark doesn't support tables and stuff, it only does basic cmark. + -- Have to strip out the metadata first, coz cmark-gfm doesn't grok that. + h = io.popen('cp "' .. name .. '.md" "' .. name .. '02" ; csplit -ksz -f "' .. name .. '" "' .. name .. '.md" "/^---$/+1" "{1}" 2>/dev/null ; rm "' .. name .. '00" ; rm "' .. name .. '01" 2>/dev/null') + if nil ~= h then h:close() end + h = io.popen('cmark-gfm -t html -e footnotes -e table -e strikethrough -e autolink -e tagfilter -e tasklist "' .. name .. '02" ; rm "' .. name .. '02"') if nil ~= h then body = h:read('*a') ; h:close() end - file.metadata = metadata - file.body = body + Files[name].metadata = metadata + Files[name].body = body table.insert(Subs[path].files, bit) end end @@ -113,23 +122,26 @@ end -- NOTE - only looking for the .md files we scanned for before, any stray HTML, html, HTM, and htm files will get ignored. local whichPage = function(f) local fl = '' - if (nil ~= Subs[f]) and (nil ~= Subs[f].files) then - if 1 == #(Subs[f].files) then fl = Subs[f].files[1] else - -- Standard files to search for. - for i, v in ipairs{'about', 'readme', 'index', 'homepage', 'mainpage', 'webhome'} do - for j, w in ipairs(Subs[f].files) do - if v == string.lower(w) then - fl = w - break + if nil ~= Subs[f] then + if nil ~= Subs[f].whichPage then return Subs[f].whichPage end + if nil ~= Subs[f].files then + if 1 == #(Subs[f].files) then fl = Subs[f].files[1] else + -- Standard files to search for. + for i, v in ipairs{'about', 'readme', 'index', 'homepage', 'mainpage', 'webhome'} do + for j, w in ipairs(Subs[f].files) do + if v == string.lower(w) then + fl = w + break + end end + if '' ~= fl then break end end - if '' ~= fl then break end + -- If nothing else, just grab the first one. + if ('' == fl) and (nil ~= Subs[f].files[1]) then fl = Subs[f].files[1] end end - -- If nothing else, just grab the first one. - if ('' == fl) and (nil ~= Subs[f].files[1]) then fl = Subs[f].files[1] end end end - if '' ~= fl then fl = fl .. '.HTML' end + if '' ~= fl then fl = fl .. '.HTML' ; Subs[f].whichPage = fl end return fl end @@ -167,7 +179,7 @@ end -- Loop through the files we found and actually create their HTML files. for name, file in pairs(Files) do local path, result = '', '' - local body, metadata = Files[name].body, Files[name].metadata + local body, metadata = Files[name].body, derefTable(Files[name].metadata, true) local bits, bit = Files[name].bits, Files[name].bit local ln = #bits @@ -237,7 +249,6 @@ for name, file in pairs(Files) do -- Figure out this pages menu links. metadata.menu = '' - if nil == metadata.title then metadata.title = metadata.pagetitle end if nil == metadata.title then metadata.title = bit end if nil ~= Subs[path].files then table.sort(Subs[path].files, function(a, b) return (string.lower(a) < string.lower(b)) end) end for i, f in ipairs(Subs[path].files) do @@ -298,6 +309,7 @@ for name, file in pairs(Files) do result = lcmark.apply_template(template, metadata) end else + print('No template for ' .. name) result = body end @@ -312,8 +324,8 @@ for name, file in pairs(Files) do end end - else - print('') +-- else +-- print('') end end -- cgit v1.1