From d1e2966ab8d3fd38edfa3e50fae8e26601ef1902 Mon Sep 17 00:00:00 2001 From: dvs1 Date: Mon, 20 Jan 2025 14:50:57 +1000 Subject: Clean up the cleaning up --- SuckItPm | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'SuckItPm') diff --git a/SuckItPm b/SuckItPm index 156ee9f..a63eb08 100755 --- a/SuckItPm +++ b/SuckItPm @@ -10,20 +10,20 @@ filter=" -not -name ".pageindex" -a \ " -pushd /opt/merged +pushd /opt/mergedWork find /opt/pmwiki/wiki.d ${filter} \ -name "*.*" -type f,l -printf "%P\n" | while read line do base=`echo "${line}" | cut -d '.' -f 1` file=`echo "${line}" | cut -d '.' -f 2` +# page="?n=${line}" mkdir -p PmWiki/$base mkdir -p combined/$base echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. -# curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM # curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL @@ -44,7 +44,6 @@ do -e "s/class='vspace'//g" \ -e "s/class='wikilink'//g" \ -e "s/style='.*;'//g" -# -e "s/class='.*'//g" \ # -e "s/style='background-color: #.*;'//g" \ # -e "s/style='font-size: .*;'//g" @@ -57,23 +56,26 @@ do -e 's/\$/\$dlr\$/g' \ -e 's/\{#.*\}//g' \ -e '/^:::/d' \ - -e '/\[Site$/d' \ - -e '/^Page last modified on /d' \ - -e '/^\[\]/d' \ - -e "s/\`\`\{=html\}\`<\/a>\`\{=html\}//g" \ - -e "s/^\`\`\{=html\}\`<\/a>\`\{=html\}//g" \ +# -e "s/^\`> PmWiki/${base}/${file}.md + # Don't need this, the parts we are grabbing already include that link at the bottom. +# echo -e "****\n[Original page](${URL}/${base}/${page}) where maybe you can edit it." >> PmWiki/${base}/${file}.md # pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm # cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body # ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md -done -notYetAnotherWiki.lua + if [ -f xx01 ]; then + rm xx01 + fi +done popd -- cgit v1.1