From f34badc723a821d732ace78f098aabea0f395f07 Mon Sep 17 00:00:00 2001 From: dvs1 Date: Thu, 2 Jan 2025 10:02:27 +1000 Subject: Add the Suckit* scripts. --- SuckItFos | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ SuckItPm | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100755 SuckItFos create mode 100755 SuckItPm diff --git a/SuckItFos b/SuckItFos new file mode 100755 index 0000000..4f824be --- /dev/null +++ b/SuckItFos @@ -0,0 +1,52 @@ +#!/bin/bash + +URL="https://devuan.daffodil.linuxoz.net" + +filter=" + -name _default -prune -o \ + -name _empty -prune -o \ + -name System -prune -o \ + -name Trash -prune -o \ + -name TWiki -prune -o \ +" + +find ../Foswiki/data ${filter} \ +-name "*.txt" -type f,l -printf "%P\n" | while read line +do + base=`echo "${line}" | cut -d '/' -f 1` + file=`echo "${line}" | cut -d '/' -f 2- | rev | cut -b 5- | rev` + mkdir -p Foswiki/$base + echo "Converting ${URL}/${base}/${file} -> Foswiki/${base}/${file}.md" +# pandoc -f html -t markdown --self-contained ${URL}/${base}/${file} >Foswiki/${base}/${file}.md + # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM + # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. + curl --silent --no-progress-meter ${URL}/${base}/${file} -o Foswiki/${base}/${file}.HTM + pandoc -f html -t commonmark_x --self-contained Foswiki//${base}/${file}.HTM >Foswiki/${base}/${file}.md + cp Foswiki/${base}/${file}.md Foswiki/${base}/${file}.md_ORIGINAL + +# csplit -ks Foswiki/${base}/${file}.md '%::: foswikiTopic%' '/::: foswikiContentFooter/' +# if [ -f xx00 ]; then +# rm Foswiki/${base}/${file}.md +# mv xx00 Foswiki/${base}/${file}.md +# fi + + # Attempt to clean things up, badly. + sed -i -E Foswiki/${base}/${file}.md \ + -e 's/\$/\$dlr\$/g' \ + -e 's/\{#.*\}//g' \ + -e 's/\{\.foswiki.*\}//g' \ + -e 's/\{\.foswiki.*//g' \ + -e 's/\{\.foswikiNewLink rel=“nofollow”\}//g' \ + -e 's/\{\.foswikiNewLink$//g' \ + -e 's/^\.foswiki.*\}//g' \ + -e 's/\{\.pattern.*\}//g' \ + -e 's/\{\.pattern.*//g' \ + -e 's/\{rel="nofollow"\}//g' \ + -e 's/^rel="nofollow"\}//g' \ + -e 's/rel=“nofollow”\}$//g' \ + -e '/^:::/d' + + echo "
Original page where you can edit it.
" >> Foswiki/${base}/${file}.md +done + +notYetAnotherWiki.lua diff --git a/SuckItPm b/SuckItPm new file mode 100755 index 0000000..6232cdc --- /dev/null +++ b/SuckItPm @@ -0,0 +1,57 @@ +#!/bin/bash + +URL="https://wiki.devuan.org" + +filter=" + -not -name "*~" -a \ + -not -name ".flock" -a \ + -not -name ".htaccess" -a \ + -not -name ".lastmod" -a \ + -not -name ".pageindex" -a \ +" + +find ../pmwiki/wiki.d ${filter} \ +-name "*.*" -type f,l -printf "%P\n" | while read line +do + base=`echo "${line}" | cut -d '.' -f 1` + file=`echo "${line}" | cut -d '.' -f 2` + mkdir -p PmWiki/$base + echo "Converting ${URL}/?n=${base}.${file} -> PmWiki/${base}/${file}.md" +# pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md + # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM + # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. + curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM + pandoc -f html -t commonmark_x --self-contained PmWiki//${base}/${file}.HTM >PmWiki/${base}/${file}.md + cp PmWiki/${base}/${file}.md PmWiki/${base}/${file}.md_ORIGINAL + +# csplit -ks PmWiki/${base}/${file}.md '/trailstart/' '/trailend/' + csplit -ks PmWiki/${base}/${file}.md '%::: {#wikitext}%' + if [ -f xx00 ]; then + rm PmWiki/${base}/${file}.md + mv xx00 PmWiki/${base}/${file}.md + fi + + # Attempt to clean things up, badly. + sed -i -E PmWiki/${base}/${file}.md \ + -e 's/\$/\$dlr\$/g' \ + -e 's/\{#.*\}//g' \ + -e '/\{\.wikilink\}/d' \ + -e '/\[Site$/d' \ + -e '/^:::/d' \ + -e '/^Page last modified on /d' \ + -e '/^\[\]/d' \ + -e 's/\{rel=".*\}//g' \ + -e 's/\{rel="nofollow"$//g' \ + -e 's/^rel="nofollow"\}//g' \ + -e 's/^target="_blank"\}//g' \ + -e 's/\{\.createlinktext.*\}//g' \ + -e 's/\{\.createlinktext$//g' \ + -e 's/\{\.createlink.*\}//g' \ + -e 's/\{\.createlink$//g' \ + -e 's/\{\.urllink.*\}//g' \ + -e 's/\{\.urllink$//g' + +echo "Original page where you can edit it.
" >> PmWiki/${base}/${file}.md +done + +notYetAnotherWiki.lua -- cgit v1.1