diff options
| author | dvs1 | 2025-01-20 14:50:57 +1000 |
|---|---|---|
| committer | dvs1 | 2025-01-20 14:50:57 +1000 |
| commit | d1e2966ab8d3fd38edfa3e50fae8e26601ef1902 (patch) | |
| tree | 08c152130ff063ce89dc5f55b4bdc0213eeb3b7b | |
| parent | Document cmark-gfm dependency. (diff) | |
| download | notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.zip notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.gz notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.bz2 notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.xz | |
Clean up the cleaning up
| -rwxr-xr-x | SuckItFos | 16 | ||||
| -rwxr-xr-x | SuckItPm | 30 | ||||
| -rw-r--r-- | TODO.md | 10 |
3 files changed, 30 insertions, 26 deletions
| @@ -10,7 +10,7 @@ filter=" | |||
| 10 | -name TWiki -prune -o \ | 10 | -name TWiki -prune -o \ |
| 11 | " | 11 | " |
| 12 | 12 | ||
| 13 | pushd /opt/merged | 13 | pushd /opt/mergedWork |
| 14 | 14 | ||
| 15 | find /opt/Foswiki/data ${filter} \ | 15 | find /opt/Foswiki/data ${filter} \ |
| 16 | -name "*.txt" -type f,l -printf "%P\n" | while read line | 16 | -name "*.txt" -type f,l -printf "%P\n" | while read line |
| @@ -26,7 +26,6 @@ do | |||
| 26 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM | 26 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM |
| 27 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. | 27 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. |
| 28 | curl --silent --no-progress-meter ${URL}/${base}/${file}?cover=print -o Foswiki/${base}/${file}.HTM | 28 | curl --silent --no-progress-meter ${URL}/${base}/${file}?cover=print -o Foswiki/${base}/${file}.HTM |
| 29 | |||
| 30 | cp Foswiki/${base}/${file}.HTM Foswiki/${base}/${file}.HTM_ORIGINAL | 29 | cp Foswiki/${base}/${file}.HTM Foswiki/${base}/${file}.HTM_ORIGINAL |
| 31 | csplit -ks Foswiki/${base}/${file}.HTM '%<div id="patternMainContents">%' '/<div class="foswikiAttachments foswikiFormStep" style="overflow:auto">/' | 30 | csplit -ks Foswiki/${base}/${file}.HTM '%<div id="patternMainContents">%' '/<div class="foswikiAttachments foswikiFormStep" style="overflow:auto">/' |
| 32 | if [ -f xx00 ]; then | 31 | if [ -f xx00 ]; then |
| @@ -58,18 +57,21 @@ do | |||
| 58 | sed -i -E Foswiki/${base}/${file}.md \ | 57 | sed -i -E Foswiki/${base}/${file}.md \ |
| 59 | -e 's/\$/\$dlr\$/g' \ | 58 | -e 's/\$/\$dlr\$/g' \ |
| 60 | -e 's/\{#.*\}//g' \ | 59 | -e 's/\{#.*\}//g' \ |
| 61 | -e 's/\{\.pattern.*\}//g' \ | 60 | -e '/^:::/d' \ |
| 62 | -e 's/\{\.pattern.*//g' \ | ||
| 63 | -e '/^<!-- -->/d' \ | 61 | -e '/^<!-- -->/d' \ |
| 64 | -e '/^:::/d' | 62 | # -e 's/\{\.pattern.*\}//g' \ |
| 63 | # -e 's/\{\.pattern.*//g' \ | ||
| 64 | |||
| 65 | echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> Foswiki/${base}/${file}.md | 65 | echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> Foswiki/${base}/${file}.md |
| 66 | 66 | ||
| 67 | # pandoc -t html -f commonmark_x --self-contained Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.htm | 67 | # pandoc -t html -f commonmark_x --self-contained Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.htm |
| 68 | # cmark-gfm -t html -e footnotes -e table -e strikethrough Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.body | 68 | # cmark-gfm -t html -e footnotes -e table -e strikethrough Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.body |
| 69 | # ln -frs Foswiki/${base}/${file}.body combined/${base}/${file}.body | 69 | # ln -frs Foswiki/${base}/${file}.body combined/${base}/${file}.body |
| 70 | ln -frs Foswiki/${base}/${file}.md combined/${base}/${file}.md | 70 | ln -frs Foswiki/${base}/${file}.md combined/${base}/${file}.md |
| 71 | done | ||
| 72 | 71 | ||
| 73 | notYetAnotherWiki.lua | 72 | if [ -f xx01 ]; then |
| 73 | rm xx01 | ||
| 74 | fi | ||
| 75 | done | ||
| 74 | 76 | ||
| 75 | popd | 77 | popd |
| @@ -10,20 +10,20 @@ filter=" | |||
| 10 | -not -name ".pageindex" -a \ | 10 | -not -name ".pageindex" -a \ |
| 11 | " | 11 | " |
| 12 | 12 | ||
| 13 | pushd /opt/merged | 13 | pushd /opt/mergedWork |
| 14 | 14 | ||
| 15 | find /opt/pmwiki/wiki.d ${filter} \ | 15 | find /opt/pmwiki/wiki.d ${filter} \ |
| 16 | -name "*.*" -type f,l -printf "%P\n" | while read line | 16 | -name "*.*" -type f,l -printf "%P\n" | while read line |
| 17 | do | 17 | do |
| 18 | base=`echo "${line}" | cut -d '.' -f 1` | 18 | base=`echo "${line}" | cut -d '.' -f 1` |
| 19 | file=`echo "${line}" | cut -d '.' -f 2` | 19 | file=`echo "${line}" | cut -d '.' -f 2` |
| 20 | # page="?n=${line}" | ||
| 20 | mkdir -p PmWiki/$base | 21 | mkdir -p PmWiki/$base |
| 21 | mkdir -p combined/$base | 22 | mkdir -p combined/$base |
| 22 | echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" | 23 | echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" |
| 23 | # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md | 24 | # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md |
| 24 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM | 25 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM |
| 25 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. | 26 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. |
| 26 | # curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM | ||
| 27 | # curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD | 27 | # curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD |
| 28 | curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM | 28 | curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM |
| 29 | cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL | 29 | cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL |
| @@ -44,7 +44,6 @@ do | |||
| 44 | -e "s/class='vspace'//g" \ | 44 | -e "s/class='vspace'//g" \ |
| 45 | -e "s/class='wikilink'//g" \ | 45 | -e "s/class='wikilink'//g" \ |
| 46 | -e "s/style='.*;'//g" | 46 | -e "s/style='.*;'//g" |
| 47 | # -e "s/class='.*'//g" \ | ||
| 48 | # -e "s/style='background-color: #.*;'//g" \ | 47 | # -e "s/style='background-color: #.*;'//g" \ |
| 49 | # -e "s/style='font-size: .*;'//g" | 48 | # -e "s/style='font-size: .*;'//g" |
| 50 | 49 | ||
| @@ -57,23 +56,26 @@ do | |||
| 57 | -e 's/\$/\$dlr\$/g' \ | 56 | -e 's/\$/\$dlr\$/g' \ |
| 58 | -e 's/\{#.*\}//g' \ | 57 | -e 's/\{#.*\}//g' \ |
| 59 | -e '/^:::/d' \ | 58 | -e '/^:::/d' \ |
| 60 | -e '/\[Site$/d' \ | 59 | # -e '/\[Site$/d' \ |
| 61 | -e '/^Page last modified on /d' \ | 60 | # -e '/^Page last modified on /d' \ |
| 62 | -e '/^\[\]/d' \ | 61 | # -e '/^\[\]/d' \ |
| 63 | -e "s/\`<a id='trailstart'>\`\{=html\}\`<\/a>\`\{=html\}//g" \ | 62 | # -e "s/\`<a id='trailstart'>\`\{=html\}\`<\/a>\`\{=html\}//g" \ |
| 64 | -e "s/^\`<img /<img /g" \ | 63 | # -e "s/^\`<img /<img /g" \ |
| 65 | -e "s/^\`\`\`\{=html\}//g" \ | 64 | # -e "s/^\`\`\`\{=html\}//g" \ |
| 66 | -e "s/^\`\`\`//g" \ | 65 | # -e "s/^\`\`\`//g" \ |
| 67 | -e "s/\`\{=html\}//g" | 66 | # -e "s/\`\{=html\}//g" |
| 68 | 67 | ||
| 69 | echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> PmWiki/${base}/${file}.md | 68 | # Don't need this, the parts we are grabbing already include that link at the bottom. |
| 69 | # echo -e "****\n[Original page](${URL}/${base}/${page}) where maybe you can edit it." >> PmWiki/${base}/${file}.md | ||
| 70 | 70 | ||
| 71 | # pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm | 71 | # pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm |
| 72 | # cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body | 72 | # cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body |
| 73 | # ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body | 73 | # ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body |
| 74 | ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md | 74 | ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md |
| 75 | done | ||
| 76 | 75 | ||
| 77 | notYetAnotherWiki.lua | 76 | if [ -f xx01 ]; then |
| 77 | rm xx01 | ||
| 78 | fi | ||
| 79 | done | ||
| 78 | 80 | ||
| 79 | popd | 81 | popd |
| @@ -3,21 +3,21 @@ | |||
| 3 | ## Do these | 3 | ## Do these |
| 4 | 4 | ||
| 5 | Bugs - | 5 | Bugs - |
| 6 | - PmWiki in it's current config needs that ?n=foo.bar nonsense for the Original page link. | ||
| 7 | - https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebPreferences.HTML "Main" trail not getting whichPage() | 6 | - https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebPreferences.HTML "Main" trail not getting whichPage() |
| 8 | - https://nyaw.wiki.devuan.org/PmWiki/Site/Site.HTML missing everything after the "?action=attr." bit. | ||
| 9 | - https://nyaw.wiki.devuan.org/PmWiki/Site/EditForm.HTML | ||
| 10 | - https://nyaw.wiki.devuan.org/PmWiki/Profiles/Debdog.HTML pandoc can't handle the background table cell colours in the "Background colours" table, which is kinda the point of it. | ||
| 11 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebHome.HTML has that twisty thing which looks not simple to remove. | 7 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebHome.HTML has that twisty thing which looks not simple to remove. |
| 12 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebChanges.HTML | 8 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebChanges.HTML |
| 13 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebLeftBarExample.HTML | 9 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebLeftBarExample.HTML |
| 14 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebPreferences.HTML has that twisty thing which looks not simple to remove. | 10 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebPreferences.HTML has that twisty thing which looks not simple to remove. |
| 15 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebSearch.HTML has that twisty thing which looks not simple to remove. | 11 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebSearch.HTML has that twisty thing which looks not simple to remove. |
| 16 | - https://nyaw.wiki.devuan.org/Foswiki/Main/AdminGroup.HTML | 12 | - https://nyaw.wiki.devuan.org/Foswiki/Main/AdminGroup.HTML |
| 17 | - https://nyaw.wiki.devuan.org/Foswiki/Main/DevuanCluster.HTML A very lengthy and complex document, I'll likely miss something, but chip away at the obvious. | ||
| 18 | - https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebLeftBar.HTML | 13 | - https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebLeftBar.HTML |
| 19 | - https://nyaw.wiki.devuan.org/Foswiki/Main/WikiGroups.HTML has that twisty thing which looks not simple to remove. | 14 | - https://nyaw.wiki.devuan.org/Foswiki/Main/WikiGroups.HTML has that twisty thing which looks not simple to remove. |
| 15 | - https://nyaw.wiki.devuan.org/PmWiki/Site/Site.HTML missing everything after the "?action=attr." bit. | ||
| 16 | - https://nyaw.wiki.devuan.org/PmWiki/Site/EditForm.HTML | ||
| 17 | - https://nyaw.wiki.devuan.org/users/dunno/DevuanCluster.HTML A very lengthy and complex document, I'll likely miss something, but chip away at the obvious. | ||
| 18 | - https://nyaw.wiki.devuan.org/users/Debdog.HTML pandoc can't handle the background table cell colours in the "Background colours" table, which is kinda the point of it. | ||
| 20 | - {.underline} is the result of <strong>foo</strong> getting lost in translation. | 19 | - {.underline} is the result of <strong>foo</strong> getting lost in translation. |
| 20 | - PmWiki in it's current config needs that ?n=foo.bar nonsense for the Original page link. Which I'm currently neatly sidestepping, the scraped page has a similar thing. | ||
| 21 | 21 | ||
| 22 | Check the timestamps on the files, only update if source is newer than destination. Meh, it's already 600 times faster than the pandoc version. | 22 | Check the timestamps on the files, only update if source is newer than destination. Meh, it's already 600 times faster than the pandoc version. |
| 23 | - One quirk to watch for is if a URL path changes, the docs that have that URL need to be redone. | 23 | - One quirk to watch for is if a URL path changes, the docs that have that URL need to be redone. |
