From 58d81d1a6d4dd09a8f19d4c42ab45caae7db030d Mon Sep 17 00:00:00 2001 From: dvs1 Date: Sun, 19 Jan 2025 21:41:47 +1000 Subject: Use the wiki print button, filter the wiki HTML early, and use cmark-gfm. --- SuckItPm | 58 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 27 deletions(-) (limited to 'SuckItPm') diff --git a/SuckItPm b/SuckItPm index 64591c3..156ee9f 100755 --- a/SuckItPm +++ b/SuckItPm @@ -19,30 +19,44 @@ do file=`echo "${line}" | cut -d '.' -f 2` mkdir -p PmWiki/$base mkdir -p combined/$base - echo "Converting ${URL}/?n=${base}.${file} -> PmWiki/${base}/${file}.md" + echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. # curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM - curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD -# pandoc -f html -t commonmark_x --self-contained PmWiki//${base}/${file}.HTM >PmWiki/${base}/${file}.md - pandoc -f markdown -t commonmark_x --self-contained PmWiki//${base}/${file}.MD >PmWiki/${base}/${file}.md - ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md - cp PmWiki/${base}/${file}.md PmWiki/${base}/${file}.md_ORIGINAL - - csplit -ks PmWiki/${base}/${file}.md '%trailstart%' '/trailend/' -# csplit -ks PmWiki/${base}/${file}.md '%::: {#wikitext}%' '/::: {#wikifoot-links .footnav}/' +# curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD + curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM + cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL + csplit -ks PmWiki/${base}/${file}.HTM '%%' '//' if [ -f xx00 ]; then - rm PmWiki/${base}/${file}.md - mv xx00 PmWiki/${base}/${file}.md + rm PmWiki/${base}/${file}.HTM + mv xx00 PmWiki/${base}/${file}.HTM fi + sed -i -E PmWiki/${base}/${file}.HTM \ + -e "s/rel='nofollow'//g" \ + -e "s/target='_blank'//g" \ + -e "s/class='createlink'//g" \ + -e "s/class='createlinktext'//g" \ + -e "s/class='escaped'//g" \ + -e "s/class='diffmarkup'//g" \ + -e "s/class='selflink'//g" \ + -e "s/class='urllink'//g" \ + -e "s/class='vspace'//g" \ + -e "s/class='wikilink'//g" \ + -e "s/style='.*;'//g" +# -e "s/class='.*'//g" \ +# -e "s/style='background-color: #.*;'//g" \ +# -e "s/style='font-size: .*;'//g" + + pandoc -f html -t commonmark_x --self-contained PmWiki//${base}/${file}.HTM >PmWiki/${base}/${file}.md +# pandoc -f markdown -t commonmark_x --self-contained PmWiki//${base}/${file}.MD >PmWiki/${base}/${file}.md + cp PmWiki/${base}/${file}.md PmWiki/${base}/${file}.md_ORIGINAL # Attempt to clean things up, badly. sed -i -E PmWiki/${base}/${file}.md \ -e 's/\$/\$dlr\$/g' \ -e 's/\{#.*\}//g' \ -e '/^:::/d' \ - -e '/\{\.wikilink\}/d' \ -e '/\[Site$/d' \ -e '/^Page last modified on /d' \ -e '/^\[\]/d' \ @@ -52,22 +66,12 @@ do -e "s/^\`\`\`//g" \ -e "s/\`\{=html\}//g" -# -e 's/\{rel=".*\}//g' \ -# -e 's/\{rel="nofollow"$//g' \ -# -e 's/^rel="nofollow"\}//g' \ -# -e 's/^target="_blank"\}//g' \ -# -e 's/\{\.createlinktext.*\}//g' \ -# -e 's/\{\.createlinktext$//g' \ -# -e 's/\{\.createlink.*\}//g' \ -# -e 's/\{\.createlink$//g' \ -# -e 's/\{\.urllink.*\}//g' \ -# -e 's/\{\.urllink$//g' - -#echo "

Original page where you can edit it.

" >> PmWiki/${base}/${file}.md - echo -e "****\n[Original page](${URL}/${base}/${file}) where you can edit it." >> PmWiki/${base}/${file}.md + echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> PmWiki/${base}/${file}.md -# pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki//${base}/${file}.htm - cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki//${base}/${file}.body +# pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm +# cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body +# ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body + ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md done notYetAnotherWiki.lua -- cgit v1.1