blob: 64591c3434da30321123193683731515f9299ecb (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
#!/bin/bash
URL="https://wiki.devuan.org"
filter="
-not -name "*~" -a \
-not -name ".flock" -a \
-not -name ".htaccess" -a \
-not -name ".lastmod" -a \
-not -name ".pageindex" -a \
"
pushd /opt/merged
find /opt/pmwiki/wiki.d ${filter} \
-name "*.*" -type f,l -printf "%P\n" | while read line
do
base=`echo "${line}" | cut -d '.' -f 1`
file=`echo "${line}" | cut -d '.' -f 2`
mkdir -p PmWiki/$base
mkdir -p combined/$base
echo "Converting ${URL}/?n=${base}.${file} -> PmWiki/${base}/${file}.md"
# pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md
# TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM
# Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time.
# curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM
curl --no-progress-meter ${URL}/?n=${base}.${file}?action=markdown -o PmWiki/${base}/${file}.MD
# pandoc -f html -t commonmark_x --self-contained PmWiki//${base}/${file}.HTM >PmWiki/${base}/${file}.md
pandoc -f markdown -t commonmark_x --self-contained PmWiki//${base}/${file}.MD >PmWiki/${base}/${file}.md
ln -frs PmWiki/${base}/${file}.md combined/${base}/${file}.md
cp PmWiki/${base}/${file}.md PmWiki/${base}/${file}.md_ORIGINAL
csplit -ks PmWiki/${base}/${file}.md '%trailstart%' '/trailend/'
# csplit -ks PmWiki/${base}/${file}.md '%::: {#wikitext}%' '/::: {#wikifoot-links .footnav}/'
if [ -f xx00 ]; then
rm PmWiki/${base}/${file}.md
mv xx00 PmWiki/${base}/${file}.md
fi
# Attempt to clean things up, badly.
sed -i -E PmWiki/${base}/${file}.md \
-e 's/\$/\$dlr\$/g' \
-e 's/\{#.*\}//g' \
-e '/^:::/d' \
-e '/\{\.wikilink\}/d' \
-e '/\[Site$/d' \
-e '/^Page last modified on /d' \
-e '/^\[\]/d' \
-e "s/\`<a id='trailstart'>\`\{=html\}\`<\/a>\`\{=html\}//g" \
-e "s/^\`<img /<img /g" \
-e "s/^\`\`\`\{=html\}//g" \
-e "s/^\`\`\`//g" \
-e "s/\`\{=html\}//g"
# -e 's/\{rel=".*\}//g' \
# -e 's/\{rel="nofollow"$//g' \
# -e 's/^rel="nofollow"\}//g' \
# -e 's/^target="_blank"\}//g' \
# -e 's/\{\.createlinktext.*\}//g' \
# -e 's/\{\.createlinktext$//g' \
# -e 's/\{\.createlink.*\}//g' \
# -e 's/\{\.createlink$//g' \
# -e 's/\{\.urllink.*\}//g' \
# -e 's/\{\.urllink$//g'
#echo "<hr/><p><a href=\"${URL}/?n=${base}.${file}\">Original page</a> where you can edit it.</p>" >> PmWiki/${base}/${file}.md
echo -e "****\n[Original page](${URL}/${base}/${file}) where you can edit it." >> PmWiki/${base}/${file}.md
# pandoc -t html -f commonmark_x --self-contained PmWiki/${base}/${file}.md > PmWiki//${base}/${file}.htm
cmark-gfm -t html -e footnotes -e table -e strikethrough PmWiki/${base}/${file}.md > PmWiki//${base}/${file}.body
done
notYetAnotherWiki.lua
popd
|