diff options
| author | dvs1 | 2025-01-20 14:50:57 +1000 |
|---|---|---|
| committer | dvs1 | 2025-01-20 14:50:57 +1000 |
| commit | d1e2966ab8d3fd38edfa3e50fae8e26601ef1902 (patch) | |
| tree | 08c152130ff063ce89dc5f55b4bdc0213eeb3b7b /SuckItPm | |
| parent | Document cmark-gfm dependency. (diff) | |
| download | notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.zip notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.gz notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.bz2 notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.xz | |
Clean up the cleaning up
Diffstat (limited to 'SuckItPm')
| -rwxr-xr-x | SuckItPm | 30 |
1 files changed, 16 insertions, 14 deletions
| @@ -10,20 +10,20 @@ filter=" | |||
| 10 | -not -name ".pageindex" -a \ | 10 | -not -name ".pageindex" -a \ |
| 11 | " | 11 | " |
| 12 | 12 | ||
| 13 | pushd /opt/merged | 13 | pushd /opt/mergedWork |
| 14 | 14 | ||
| 15 | find /opt/pmwiki/wiki.d ${filter} \ | 15 | find /opt/pmwiki/wiki.d ${filter} \ |
| 16 | -name "*.*" -type f,l -printf "%P\n" | while read line | 16 | -name "*.*" -type f,l -printf "%P\n" | while read line |
| 17 | do | 17 | do |
| 18 | base=`echo "${line}" | cut -d '.' -f 1` | 18 | base=`echo "${line}" | cut -d '.' -f 1` |
| 19 | file=`echo "${line}" | cut -d '.' -f 2` | 19 | file=`echo "${line}" | cut -d '.' -f 2` |
| 20 | # page="?n=${line}" | ||
| 20 | mkdir -p PmWiki/$base | 21 | mkdir -p PmWiki/$base |
| 21 | mkdir -p combined/$base | 22 | mkdir -p combined/$base |
| 22 | echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" | 23 | echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" |
| 23 | # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md | 24 | # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md |
| 24 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM | 25 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM |
| 25 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. | 26 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. |
| 26 | # curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM | ||
| 27 | # curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD | 27 | # curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD |
| 28 | curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM | 28 | curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM |
| 29 | cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL | 29 | cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL |
| @@ -44,7 +44,6 @@ do | |||
| 44 | -e "s/class='vspace'//g" \ | 44 | -e "s/class='vspace'//g" \ |
| 45 | -e "s/class='wikilink'//g" \ | 45 | -e "s/class='wikilink'//g" \ |
| 46 | -e "s/style='.*;'//g" | 46 | -e "s/style='.*;'//g" |
| 47 | # -e "s/class='.*'//g" \ | ||
| 48 | # -e "s/style='background-color: #.*;'//g" \ | 47 | # -e "s/style='background-color: #.*;'//g" \ |
| 49 | # -e "s/style='font-size: .*;'//g" | 48 | # -e "s/style='font-size: .*;'//g" |
| 50 | 49 | ||
| @@ -57,23 +56,26 @@ do | |||
| 57 | -e 's/\$/\$dlr\$/g' \ | 56 | -e 's/\$/\$dlr\$/g' \ |
| 58 | -e 's/\{#.*\}//g' \ | 57 | -e 's/\{#.*\}//g' \ |
| 59 | -e '/^:::/d' \ | 58 | -e '/^:::/d' \ |
| 60 | -e '/\[Site$/d' \ | 59 | # -e '/\[Site$/d' \ |
| 61 | -e '/^Page last modified on /d' \ | 60 | # -e '/^Page last modified on /d' \ |
| 62 | -e '/^\[\]/d' \ | 61 | # -e '/^\[\]/d' \ |
| 63 | -e "s/\`<a id='trailstart'>\`\{=html\}\`<\/a>\`\{=html\}//g" \ | 62 | # -e "s/\`<a id='trailstart'>\`\{=html\}\`<\/a>\`\{=html\}//g" \ |
| 64 | -e "s/^\`<img /<img /g" \ | 63 | # -e "s/^\`<img /<img /g" \ |
| 65 | -e "s/^\`\`\`\{=html\}//g" \ | 64 | # -e "s/^\`\`\`\{=html\}//g" \ |
| 66 | -e "s/^\`\`\`//g" \ | 65 | # -e "s/^\`\`\`//g" \ |
| 67 | -e "s/\`\{=html\}//g" | 66 | # -e "s/\`\{=html\}//g" |
| 68 | 67 | ||
| 69 | echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> PmWiki/${base}/${file}.md | 68 | # Don't need this, the parts we are grabbing already include that link at the bottom. |
| 69 | # echo -e "****\n[Original page](${URL}/${base}/${page}) where maybe you can edit it." >> PmWiki/${base}/${file}.md | ||
| 70 | 70 | ||
| 71 | # pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm | 71 | # pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm |
| 72 | # cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body | 72 | # cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body |
| 73 | # ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body | 73 | # ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body |
| 74 | ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md | 74 | ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md |
| 75 | done | ||
| 76 | 75 | ||
| 77 | notYetAnotherWiki.lua | 76 | if [ -f xx01 ]; then |
| 77 | rm xx01 | ||
| 78 | fi | ||
| 79 | done | ||
| 78 | 80 | ||
| 79 | popd | 81 | popd |
