#!/bin/bash URL="https://wiki.devuan.org" filter=" -not -name "*~" -a \ -not -name ".flock" -a \ -not -name ".htaccess" -a \ -not -name ".lastmod" -a \ -not -name ".pageindex" -a \ " pushd /opt/mergedWork find /opt/pmwiki/wiki.d ${filter} \ -name "*.*" -type f,l -printf "%P\n" | while read line do base=`echo "${line}" | cut -d '.' -f 1` file=`echo "${line}" | cut -d '.' -f 2` time=`date --rfc-3339=seconds -ur /opt/pmwiki/wiki.d/${base}.${file} | cut -d '+' -f 1` mkdir -p PmWiki/$base mkdir -p combined/$base echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" echo -e "ogWiki=PmWiki\nogURL=${URL}\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > PmWiki/${base}/${file}.md.md # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. # curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL csplit -ks PmWiki/${base}/${file}.HTM '%%' '//' if [ -f xx00 ]; then rm PmWiki/${base}/${file}.HTM mv xx00 PmWiki/${base}/${file}.HTM fi if [ -f xx01 ]; then rm xx01 fi sed -i -E PmWiki/${base}/${file}.HTM \ -e "s/rel='nofollow'//g" \ -e "s/target='_blank'//g" \ -e "s/class='categorylink'//g" \ -e "s/class='createlink'//g" \ -e "s/class='createlinktext'//g" \ -e "s/class='escaped'//g" \ -e "s/class='diffmarkup'//g" \ -e "s/class='selflink'//g" \ -e "s/class='urllink'//g" \ -e "s/class='vspace'//g" \ -e "s/class='wikilink'//g" \ -e "s/style='.*;'//g" # pandoc -f markdown -t commonmark_x --self-contained PmWiki//${base}/${file}.MD >PmWiki/${base}/${file}.md pandoc -f html -t commonmark_x --self-contained PmWiki//${base}/${file}.HTM >PmWiki/${base}/${file}.md cp PmWiki/${base}/${file}.md PmWiki/${base}/${file}.md_ORIGINAL # Attempt to clean things up, badly. sed -i -E PmWiki/${base}/${file}.md \ -e 's/\$/\$dlr\$/g' \ -e 's/\{#.*\}//g' \ -e '/^:::/d' \ # -e '/\[Site$/d' \ # -e '/^\[\]/d' \ # -e "s/\`\`\{=html\}\`<\/a>\`\{=html\}//g" \ # -e "s/^\`