#!/bin/bash URL="https://wiki.devuan.org" filter=" -not -name "*~" -a \ -not -name ".flock" -a \ -not -name ".htaccess" -a \ -not -name ".lastmod" -a \ -not -name ".pageindex" -a \ " pushd /opt/merged find /opt/pmwiki/wiki.d ${filter} \ -name "*.*" -type f,l -printf "%P\n" | while read line do base=`echo "${line}" | cut -d '.' -f 1` file=`echo "${line}" | cut -d '.' -f 2` mkdir -p PmWiki/$base mkdir -p combined/$base echo "Converting ${URL}/?n=${base}.${file} -> PmWiki/${base}/${file}.md" # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. # curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD # pandoc -f html -t commonmark_x --self-contained PmWiki//${base}/${file}.HTM >PmWiki/${base}/${file}.md pandoc -f markdown -t commonmark_x --self-contained PmWiki//${base}/${file}.MD >PmWiki/${base}/${file}.md ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md cp PmWiki/${base}/${file}.md PmWiki/${base}/${file}.md_ORIGINAL csplit -ks PmWiki/${base}/${file}.md '%trailstart%' '/trailend/' # csplit -ks PmWiki/${base}/${file}.md '%::: {#wikitext}%' '/::: {#wikifoot-links .footnav}/' if [ -f xx00 ]; then rm PmWiki/${base}/${file}.md mv xx00 PmWiki/${base}/${file}.md fi # Attempt to clean things up, badly. sed -i -E PmWiki/${base}/${file}.md \ -e 's/\$/\$dlr\$/g' \ -e 's/\{#.*\}//g' \ -e '/^:::/d' \ -e '/\{\.wikilink\}/d' \ -e '/\[Site$/d' \ -e '/^Page last modified on /d' \ -e '/^\[\]/d' \ -e "s/\`\`\{=html\}\`<\/a>\`\{=html\}//g" \ -e "s/^\`

Original page where you can edit it.

" >> PmWiki/${base}/${file}.md echo -e "****\n[Original page](${URL}/${base}/${file}) where you can edit it." >> PmWiki/${base}/${file}.md # pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki//${base}/${file}.htm cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki//${base}/${file}.body done notYetAnotherWiki.lua popd