aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authordvs12025-01-20 14:50:57 +1000
committerdvs12025-01-20 14:50:57 +1000
commitd1e2966ab8d3fd38edfa3e50fae8e26601ef1902 (patch)
tree08c152130ff063ce89dc5f55b4bdc0213eeb3b7b
parentDocument cmark-gfm dependency. (diff)
downloadnotYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.zip
notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.gz
notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.bz2
notYetAnotherWiki-d1e2966ab8d3fd38edfa3e50fae8e26601ef1902.tar.xz
Clean up the cleaning up
-rwxr-xr-xSuckItFos16
-rwxr-xr-xSuckItPm30
-rw-r--r--TODO.md10
3 files changed, 30 insertions, 26 deletions
diff --git a/SuckItFos b/SuckItFos
index b59f6b0..11adaf9 100755
--- a/SuckItFos
+++ b/SuckItFos
@@ -10,7 +10,7 @@ filter="
10 -name TWiki -prune -o \ 10 -name TWiki -prune -o \
11" 11"
12 12
13pushd /opt/merged 13pushd /opt/mergedWork
14 14
15find /opt/Foswiki/data ${filter} \ 15find /opt/Foswiki/data ${filter} \
16-name "*.txt" -type f,l -printf "%P\n" | while read line 16-name "*.txt" -type f,l -printf "%P\n" | while read line
@@ -26,7 +26,6 @@ do
26 # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM 26 # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM
27 # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. 27 # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time.
28 curl --silent --no-progress-meter ${URL}/${base}/${file}?cover=print -o Foswiki/${base}/${file}.HTM 28 curl --silent --no-progress-meter ${URL}/${base}/${file}?cover=print -o Foswiki/${base}/${file}.HTM
29
30 cp Foswiki/${base}/${file}.HTM Foswiki/${base}/${file}.HTM_ORIGINAL 29 cp Foswiki/${base}/${file}.HTM Foswiki/${base}/${file}.HTM_ORIGINAL
31 csplit -ks Foswiki/${base}/${file}.HTM '%<div id="patternMainContents">%' '/<div class="foswikiAttachments foswikiFormStep" style="overflow:auto">/' 30 csplit -ks Foswiki/${base}/${file}.HTM '%<div id="patternMainContents">%' '/<div class="foswikiAttachments foswikiFormStep" style="overflow:auto">/'
32 if [ -f xx00 ]; then 31 if [ -f xx00 ]; then
@@ -58,18 +57,21 @@ do
58 sed -i -E Foswiki/${base}/${file}.md \ 57 sed -i -E Foswiki/${base}/${file}.md \
59 -e 's/\$/\$dlr\$/g' \ 58 -e 's/\$/\$dlr\$/g' \
60 -e 's/\{#.*\}//g' \ 59 -e 's/\{#.*\}//g' \
61 -e 's/\{\.pattern.*\}//g' \ 60 -e '/^:::/d' \
62 -e 's/\{\.pattern.*//g' \
63 -e '/^<!-- -->/d' \ 61 -e '/^<!-- -->/d' \
64 -e '/^:::/d' 62# -e 's/\{\.pattern.*\}//g' \
63# -e 's/\{\.pattern.*//g' \
64
65 echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> Foswiki/${base}/${file}.md 65 echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> Foswiki/${base}/${file}.md
66 66
67# pandoc -t html -f commonmark_x --self-contained Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.htm 67# pandoc -t html -f commonmark_x --self-contained Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.htm
68# cmark-gfm -t html -e footnotes -e table -e strikethrough Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.body 68# cmark-gfm -t html -e footnotes -e table -e strikethrough Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.body
69# ln -frs Foswiki/${base}/${file}.body combined/${base}/${file}.body 69# ln -frs Foswiki/${base}/${file}.body combined/${base}/${file}.body
70 ln -frs Foswiki/${base}/${file}.md combined/${base}/${file}.md 70 ln -frs Foswiki/${base}/${file}.md combined/${base}/${file}.md
71done
72 71
73notYetAnotherWiki.lua 72 if [ -f xx01 ]; then
73 rm xx01
74 fi
75done
74 76
75popd 77popd
diff --git a/SuckItPm b/SuckItPm
index 156ee9f..a63eb08 100755
--- a/SuckItPm
+++ b/SuckItPm
@@ -10,20 +10,20 @@ filter="
10 -not -name ".pageindex" -a \ 10 -not -name ".pageindex" -a \
11" 11"
12 12
13pushd /opt/merged 13pushd /opt/mergedWork
14 14
15find /opt/pmwiki/wiki.d ${filter} \ 15find /opt/pmwiki/wiki.d ${filter} \
16-name "*.*" -type f,l -printf "%P\n" | while read line 16-name "*.*" -type f,l -printf "%P\n" | while read line
17do 17do
18 base=`echo "${line}" | cut -d '.' -f 1` 18 base=`echo "${line}" | cut -d '.' -f 1`
19 file=`echo "${line}" | cut -d '.' -f 2` 19 file=`echo "${line}" | cut -d '.' -f 2`
20# page="?n=${line}"
20 mkdir -p PmWiki/$base 21 mkdir -p PmWiki/$base
21 mkdir -p combined/$base 22 mkdir -p combined/$base
22 echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md" 23 echo "Converting ${URL}/?n=${base}.${file}?action=print -> PmWiki/${base}/${file}.md"
23# pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md 24# pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md
24 # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM 25 # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM
25 # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. 26 # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time.
26# curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM
27# curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD 27# curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD
28 curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM 28 curl --no-progress-meter ${URL}/?n=${base}.${file}?action=print -o PmWiki/${base}/${file}.HTM
29 cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL 29 cp PmWiki/${base}/${file}.HTM PmWiki/${base}/${file}.HTM_ORIGINAL
@@ -44,7 +44,6 @@ do
44 -e "s/class='vspace'//g" \ 44 -e "s/class='vspace'//g" \
45 -e "s/class='wikilink'//g" \ 45 -e "s/class='wikilink'//g" \
46 -e "s/style='.*;'//g" 46 -e "s/style='.*;'//g"
47# -e "s/class='.*'//g" \
48# -e "s/style='background-color: #.*;'//g" \ 47# -e "s/style='background-color: #.*;'//g" \
49# -e "s/style='font-size: .*;'//g" 48# -e "s/style='font-size: .*;'//g"
50 49
@@ -57,23 +56,26 @@ do
57 -e 's/\$/\$dlr\$/g' \ 56 -e 's/\$/\$dlr\$/g' \
58 -e 's/\{#.*\}//g' \ 57 -e 's/\{#.*\}//g' \
59 -e '/^:::/d' \ 58 -e '/^:::/d' \
60 -e '/\[Site$/d' \ 59# -e '/\[Site$/d' \
61 -e '/^Page last modified on /d' \ 60# -e '/^Page last modified on /d' \
62 -e '/^\[\]/d' \ 61# -e '/^\[\]/d' \
63 -e "s/\`<a id='trailstart'>\`\{=html\}\`<\/a>\`\{=html\}//g" \ 62# -e "s/\`<a id='trailstart'>\`\{=html\}\`<\/a>\`\{=html\}//g" \
64 -e "s/^\`<img /<img /g" \ 63# -e "s/^\`<img /<img /g" \
65 -e "s/^\`\`\`\{=html\}//g" \ 64# -e "s/^\`\`\`\{=html\}//g" \
66 -e "s/^\`\`\`//g" \ 65# -e "s/^\`\`\`//g" \
67 -e "s/\`\{=html\}//g" 66# -e "s/\`\{=html\}//g"
68 67
69 echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> PmWiki/${base}/${file}.md 68 # Don't need this, the parts we are grabbing already include that link at the bottom.
69# echo -e "****\n[Original page](${URL}/${base}/${page}) where maybe you can edit it." >> PmWiki/${base}/${file}.md
70 70
71# pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm 71# pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.htm
72# cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body 72# cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki/${base}/${file}.body
73# ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body 73# ln -frs PmWiki/${base}/${file}.body combined/${base}/${file}.body
74 ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md 74 ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md
75done
76 75
77notYetAnotherWiki.lua 76 if [ -f xx01 ]; then
77 rm xx01
78 fi
79done
78 80
79popd 81popd
diff --git a/TODO.md b/TODO.md
index 0a748d2..4157083 100644
--- a/TODO.md
+++ b/TODO.md
@@ -3,21 +3,21 @@
3## Do these 3## Do these
4 4
5Bugs - 5Bugs -
6- PmWiki in it's current config needs that ?n=foo.bar nonsense for the Original page link.
7- https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebPreferences.HTML &nbsp; "Main" trail not getting whichPage() 6- https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebPreferences.HTML &nbsp; "Main" trail not getting whichPage()
8- https://nyaw.wiki.devuan.org/PmWiki/Site/Site.HTML &nbsp; missing everything after the "?action=attr." bit.
9- https://nyaw.wiki.devuan.org/PmWiki/Site/EditForm.HTML
10- https://nyaw.wiki.devuan.org/PmWiki/Profiles/Debdog.HTML &nbsp; pandoc can't handle the background table cell colours in the "Background colours" table, which is kinda the point of it.
11- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebHome.HTML &nbsp; has that twisty thing which looks not simple to remove. 7- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebHome.HTML &nbsp; has that twisty thing which looks not simple to remove.
12- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebChanges.HTML 8- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebChanges.HTML
13- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebLeftBarExample.HTML 9- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebLeftBarExample.HTML
14- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebPreferences.HTML &nbsp; has that twisty thing which looks not simple to remove. 10- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebPreferences.HTML &nbsp; has that twisty thing which looks not simple to remove.
15- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebSearch.HTML &nbsp; has that twisty thing which looks not simple to remove. 11- https://nyaw.wiki.devuan.org/Foswiki/Sandbox/WebSearch.HTML &nbsp; has that twisty thing which looks not simple to remove.
16- https://nyaw.wiki.devuan.org/Foswiki/Main/AdminGroup.HTML 12- https://nyaw.wiki.devuan.org/Foswiki/Main/AdminGroup.HTML
17- https://nyaw.wiki.devuan.org/Foswiki/Main/DevuanCluster.HTML &nbsp; A very lengthy and complex document, I'll likely miss something, but chip away at the obvious.
18- https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebLeftBar.HTML 13- https://nyaw.wiki.devuan.org/Foswiki/Main/JensKorte/WebLeftBar.HTML
19- https://nyaw.wiki.devuan.org/Foswiki/Main/WikiGroups.HTML &nbsp; has that twisty thing which looks not simple to remove. 14- https://nyaw.wiki.devuan.org/Foswiki/Main/WikiGroups.HTML &nbsp; has that twisty thing which looks not simple to remove.
15- https://nyaw.wiki.devuan.org/PmWiki/Site/Site.HTML &nbsp; missing everything after the "?action=attr." bit.
16- https://nyaw.wiki.devuan.org/PmWiki/Site/EditForm.HTML
17- https://nyaw.wiki.devuan.org/users/dunno/DevuanCluster.HTML &nbsp; A very lengthy and complex document, I'll likely miss something, but chip away at the obvious.
18- https://nyaw.wiki.devuan.org/users/Debdog.HTML &nbsp; pandoc can't handle the background table cell colours in the "Background colours" table, which is kinda the point of it.
20- {.underline} is the result of <strong>foo</strong> getting lost in translation. 19- {.underline} is the result of <strong>foo</strong> getting lost in translation.
20- PmWiki in it's current config needs that ?n=foo.bar nonsense for the Original page link. Which I'm currently neatly sidestepping, the scraped page has a similar thing.
21 21
22Check the timestamps on the files, only update if source is newer than destination. Meh, it's already 600 times faster than the pandoc version. 22Check the timestamps on the files, only update if source is newer than destination. Meh, it's already 600 times faster than the pandoc version.
23- One quirk to watch for is if a URL path changes, the docs that have that URL need to be redone. 23- One quirk to watch for is if a URL path changes, the docs that have that URL need to be redone.