diff options
author | dvs1 | 2025-01-20 14:50:57 +1000 |
---|---|---|
committer | dvs1 | 2025-01-20 14:50:57 +1000 |
commit | d1e2966ab8d3fd38edfa3e50fae8e26601ef1902 (patch) | |
tree | 08c152130ff063ce89dc5f55b4bdc0213eeb3b7b | |
parent | Document cmark-gfm dependency. (diff) | |
download | notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.zip notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.gz notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.bz2 notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.xz |
Clean up the cleaning up
-rwxr-xr-x | SuckItFos | 16 | ||||
-rwxr-xr-x | SuckItPm | 30 | ||||
-rw-r--r-- | TODO.md | 10 |
3 files changed, 30 insertions, 26 deletions
@@ -10,7 +10,7 @@ filter=" | |||
10 | -name TWiki -prune -o \ | 10 | -name TWiki -prune -o \ |
11 | " | 11 | " |
12 | 12 | ||
13 | pushd /opt/merged | 13 | pushd /opt/mergedWork |
14 | 14 | ||
15 | find /opt/Foswiki/data ${filter} \ | 15 | find /opt/Foswiki/data ${filter} \ |
16 | -name "*.txt" -type f,l -printf "%P\n" | while read line | 16 | -name "*.txt" -type f,l -printf "%P\n" | while read line |
@@ -26,7 +26,6 @@ do | |||
26 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM | 26 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM |
27 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. | 27 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. |
28 | curl --silent --no-progress-meter ${URL}/${base}/${file}?cover=print -o Foswiki/${base}/${file}.HTM | 28 | curl --silent --no-progress-meter ${URL}/${base}/${file}?cover=print -o Foswiki/${base}/${file}.HTM |
29 | |||
30 | cp Foswiki/${base}/${file}.HTM Foswiki/${base}/${file}.HTM_ORIGINAL | 29 | cp Foswiki/${base}/${file}.HTM Foswiki/${base}/${file}.HTM_ORIGINAL |
31 | csplit -ks Foswiki/${base}/${file}.HTM '%<div id="patternMainContents">%' '/<div class="foswikiAttachments foswikiFormStep" style="overflow:auto">/' | 30 | csplit -ks Foswiki/${base}/${file}.HTM '%<div id="patternMainContents">%' '/<div class="foswikiAttachments foswikiFormStep" style="overflow:auto">/' |
32 | if [ -f xx00 ]; then | 31 | if [ -f xx00 ]; then |
@@ -58,18 +57,21 @@ do | |||
58 | sed -i -E Foswiki/${base}/${file}.md \ | 57 | sed -i -E Foswiki/${base}/${file}.md \ |
59 | -e 's/\$/\$dlr\$/g' \ | 58 | -e 's/\$/\$dlr\$/g' \ |
60 | -e 's/\{#.*\}//g' \ | 59 | -e 's/\{#.*\}//g' \ |
61 | -e 's/\{\.pattern.*\}//g' \ | 60 | -e '/^:::/d' \ |
62 | -e 's/\{\.pattern.*//g' \ | ||
63 | -e '/^<!-- -->/d' \ | 61 | -e '/^<!-- -->/d' \ |
64 | -e '/^:::/d' | 62 | # -e 's/\{\.pattern.*\}//g' \ |
63 | # -e 's/\{\.pattern.*//g' \ | ||
64 | |||
65 | echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> Foswiki/${base}/${file}.md | 65 | echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> Foswiki/${base}/${file}.md |
66 | 66 | ||
67 | # pandoc -t html -f commonmark_x --self-contained Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.htm | 67 | # pandoc -t html -f commonmark_x --self-contained Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.htm |
68 | # cmark-gfm -t html -e footnotes -e table -e strikethrough Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.body | 68 | # cmark-gfm -t html -e footnotes -e table -e strikethrough Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.body |
69 | # ln -frs Foswiki/${base}/${file}.body combined/${base}/${file}.body | 69 | # ln -frs Foswiki/${base}/${file}.body combined/${base}/${file}.body |
70 | ln -frs Foswiki/${base}/${file}.md combined/${base}/${file}.md | 70 | ln -frs Foswiki/${base}/${file}.md combined/${base}/${file}.md |
71 | done | ||
72 | 71 | ||
73 | notYetAnotherWiki.lua | 72 | if [ -f xx01 ]; then |
73 | rm xx01 | ||
74 | fi | ||
75 | done | ||
74 | 76 | ||
75 | popd | 77 | popd |
@@ -10,20 +10,20 @@ filter=" | |||
10 | -not -name ".pageindex" -a \ | 10 | -not -name ".pageindex" -a \ |
11 | " | 11 | " |
12 | 12 | ||
13 | pushd /opt/merged | 13 | pushd /opt/mergedWork |
14 | 14 | ||
15 | find /opt/pmwiki/wiki.d ${filter} \ | 15 | find /opt/pmwiki/wiki.d ${filter} \ |
16 | -name "*.*" -type f,l -printf "%P\n" | while read line | 16 | -name "*.*" -type f,l -printf "%P\n" | while read line |
17 | do | 17 | do |
18 | base=`echo "${line}" | cut -d '.' -f 1` | 18 | base=`echo "${line}" | cut -d '.' -f 1` |
19 | file=`echo "${line}" | cut -d '.' -f 2` | 19 | file=`echo "${line}" | cut -d '.' -f 2` |
20 | # page="?n=${line}" | ||
20 | mkdir -p PmWiki/$base | 21 | mkdir -p PmWiki/$base |
21 | mkdir -p combined/$base | 22 | mkdir -p combined/$base |
22 | echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" | 23 | echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" |
23 | # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md | 24 | # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md |
24 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM | 25 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM |
25 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. | 26 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. |
26 | # curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM | ||
27 | # curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD | 27 | # curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD |
28 | curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM | 28 | curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM |
29 | cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL | 29 | cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL |
@@ -44,7 +44,6 @@ do | |||
44 | -e "s/class='vspace'//g" \ | 44 | -e "s/class='vspace'//g" \ |
45 | -e "s/class='wikilink'//g" \ | 45 | -e "s/class='wikilink'//g" \ |
46 | -e "s/style='.*;'//g" | 46 | -e "s/style='.*;'//g" |
47 | # -e "s/class='.*'//g" \ | ||
48 | # -e "s/style='background-color: #.*;'//g" \ | 47 | # -e "s/style='background-color: #.*;'//g" \ |
49 | # -e "s/style='font-size: .*;'//g" | 48 | # -e "s/style='font-size: .*;'//g" |
50 | 49 | ||
@@ -57,23 +56,26 @@ do | |||
57 | -e 's/\$/\$dlr\$/g' \ | 56 | -e 's/\$/\$dlr\$/g' \ |
58 | -e 's/\{#.*\}//g' \ | 57 | -e 's/\{#.*\}//g' \ |
59 | -e '/^:::/d' \ | 58 | -e '/^:::/d' \ |
60 | -e '/\[Site$/d' \ | 59 | # -e '/\[Site$/d' \ |
61 | -e '/^Page last modified on /d' \ | 60 | # -e '/^Page last modified on /d' \ |
62 | -e '/^\[\]/d' \ | 61 | # -e '/^\[\]/d' \ |
63 | -e "s/\`<a id='trailstart'>\`\{=html\}\`<\/a>\`\{=html\}//g" \ | 62 | # -e "s/\`<a id='trailstart'>\`\{=html\}\`<\/a>\`\{=html\}//g" \ |
64 | -e "s/^\`<img /<img /g" \ | 63 | # -e "s/^\`<img /<img /g" \ |
65 | -e "s/^\`\`\`\{=html\}//g" \ | 64 | # -e "s/^\`\`\`\{=html\}//g" \ |
66 | -e "s/^\`\`\`//g" \ | 65 | # -e "s/^\`\`\`//g" \ |
67 | -e "s/\`\{=html\}//g" | 66 | # -e "s/\`\{=html\}//g" |
68 | 67 | ||
69 | echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> PmWiki/${base}/${file}.md | 68 | # Don't need this, the parts we are grabbing already include that link at the bottom. |
69 | # echo -e "****\n[Original page](${URL}/${base}/${page}) where maybe you can edit it." >> PmWiki/${base}/${file}.md | ||
70 | 70 | ||
71 | # pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm | 71 | # pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm |
72 | # cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body | 72 | # cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body |
73 | # ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body | 73 | # ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body |
74 | ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md | 74 | ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md |
75 | done | ||
76 | 75 | ||
77 | notYetAnotherWiki.lua | 76 | if [ -f xx01 ]; then |
77 | rm xx01 | ||
78 | fi | ||
79 | done | ||
78 | 80 | ||
79 | popd | 81 | popd |
@@ -3,21 +3,21 @@ | |||
3 | ## Do these | 3 | ## Do these |
4 | 4 | ||
5 | Bugs - | 5 | Bugs - |
6 | - PmWiki in it's current config needs that ?n=foo.bar nonsense for the Original page link. | ||
7 | - https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebPreferences.HTML "Main" trail not getting whichPage() | 6 | - https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebPreferences.HTML "Main" trail not getting whichPage() |
8 | - https://nyaw.wiki.devuan.org/PmWiki/Site/Site.HTML missing everything after the "?action=attr." bit. | ||
9 | - https://nyaw.wiki.devuan.org/PmWiki/Site/EditForm.HTML | ||
10 | - https://nyaw.wiki.devuan.org/PmWiki/Profiles/Debdog.HTML pandoc can't handle the background table cell colours in the "Background colours" table, which is kinda the point of it. | ||
11 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebHome.HTML has that twisty thing which looks not simple to remove. | 7 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebHome.HTML has that twisty thing which looks not simple to remove. |
12 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebChanges.HTML | 8 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebChanges.HTML |
13 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebLeftBarExample.HTML | 9 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebLeftBarExample.HTML |
14 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebPreferences.HTML has that twisty thing which looks not simple to remove. | 10 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebPreferences.HTML has that twisty thing which looks not simple to remove. |
15 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebSearch.HTML has that twisty thing which looks not simple to remove. | 11 | - https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebSearch.HTML has that twisty thing which looks not simple to remove. |
16 | - https://nyaw.wiki.devuan.org/Foswiki/Main/AdminGroup.HTML | 12 | - https://nyaw.wiki.devuan.org/Foswiki/Main/AdminGroup.HTML |
17 | - https://nyaw.wiki.devuan.org/Foswiki/Main/DevuanCluster.HTML A very lengthy and complex document, I'll likely miss something, but chip away at the obvious. | ||
18 | - https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebLeftBar.HTML | 13 | - https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebLeftBar.HTML |
19 | - https://nyaw.wiki.devuan.org/Foswiki/Main/WikiGroups.HTML has that twisty thing which looks not simple to remove. | 14 | - https://nyaw.wiki.devuan.org/Foswiki/Main/WikiGroups.HTML has that twisty thing which looks not simple to remove. |
15 | - https://nyaw.wiki.devuan.org/PmWiki/Site/Site.HTML missing everything after the "?action=attr." bit. | ||
16 | - https://nyaw.wiki.devuan.org/PmWiki/Site/EditForm.HTML | ||
17 | - https://nyaw.wiki.devuan.org/users/dunno/DevuanCluster.HTML A very lengthy and complex document, I'll likely miss something, but chip away at the obvious. | ||
18 | - https://nyaw.wiki.devuan.org/users/Debdog.HTML pandoc can't handle the background table cell colours in the "Background colours" table, which is kinda the point of it. | ||
20 | - {.underline} is the result of <strong>foo</strong> getting lost in translation. | 19 | - {.underline} is the result of <strong>foo</strong> getting lost in translation. |
20 | - PmWiki in it's current config needs that ?n=foo.bar nonsense for the Original page link. Which I'm currently neatly sidestepping, the scraped page has a similar thing. | ||
21 | 21 | ||
22 | Check the timestamps on the files, only update if source is newer than destination. Meh, it's already 600 times faster than the pandoc version. | 22 | Check the timestamps on the files, only update if source is newer than destination. Meh, it's already 600 times faster than the pandoc version. |
23 | - One quirk to watch for is if a URL path changes, the docs that have that URL need to be redone. | 23 | - One quirk to watch for is if a URL path changes, the docs that have that URL need to be redone. |