diff options
Diffstat (limited to 'SuckItPm')
-rwxr-xr-x | SuckItPm | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/SuckItPm b/SuckItPm new file mode 100755 index 0000000..6232cdc --- /dev/null +++ b/SuckItPm | |||
@@ -0,0 +1,57 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | URL="https://wiki.devuan.org" | ||
4 | |||
5 | filter=" | ||
6 | -not -name "*~" -a \ | ||
7 | -not -name ".flock" -a \ | ||
8 | -not -name ".htaccess" -a \ | ||
9 | -not -name ".lastmod" -a \ | ||
10 | -not -name ".pageindex" -a \ | ||
11 | " | ||
12 | |||
13 | find ../pmwiki/wiki.d ${filter} \ | ||
14 | -name "*.*" -type f,l -printf "%P\n" | while read line | ||
15 | do | ||
16 | base=`echo "${line}" | cut -d '.' -f 1` | ||
17 | file=`echo "${line}" | cut -d '.' -f 2` | ||
18 | mkdir -p PmWiki/$base | ||
19 | echo "Converting ${URL}/?n=${base}.${file} -> PmWiki/${base}/${file}.md" | ||
20 | # pandoc -f html -t markdown --self-contained ${URL}/?n=${base}.${file} >PmWiki/${base}/${file}.md | ||
21 | # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages. curl to .HTM | ||
22 | # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. | ||
23 | curl --no-progress-meter ${URL}/?n=${base}.${file} -o PmWiki/${base}/${file}.HTM | ||
24 | pandoc -f html -t commonmark_x --self-contained PmWiki//${base}/${file}.HTM >PmWiki/${base}/${file}.md | ||
25 | cp PmWiki/${base}/${file}.md PmWiki/${base}/${file}.md_ORIGINAL | ||
26 | |||
27 | # csplit -ks PmWiki/${base}/${file}.md '/trailstart/' '/trailend/' | ||
28 | csplit -ks PmWiki/${base}/${file}.md '%::: {#wikitext}%' | ||
29 | if [ -f xx00 ]; then | ||
30 | rm PmWiki/${base}/${file}.md | ||
31 | mv xx00 PmWiki/${base}/${file}.md | ||
32 | fi | ||
33 | |||
34 | # Attempt to clean things up, badly. | ||
35 | sed -i -E PmWiki/${base}/${file}.md \ | ||
36 | -e 's/\$/\$dlr\$/g' \ | ||
37 | -e 's/\{#.*\}//g' \ | ||
38 | -e '/\{\.wikilink\}/d' \ | ||
39 | -e '/\[Site$/d' \ | ||
40 | -e '/^:::/d' \ | ||
41 | -e '/^Page last modified on /d' \ | ||
42 | -e '/^\[\]/d' \ | ||
43 | -e 's/\{rel=".*\}//g' \ | ||
44 | -e 's/\{rel="nofollow"$//g' \ | ||
45 | -e 's/^rel="nofollow"\}//g' \ | ||
46 | -e 's/^target="_blank"\}//g' \ | ||
47 | -e 's/\{\.createlinktext.*\}//g' \ | ||
48 | -e 's/\{\.createlinktext$//g' \ | ||
49 | -e 's/\{\.createlink.*\}//g' \ | ||
50 | -e 's/\{\.createlink$//g' \ | ||
51 | -e 's/\{\.urllink.*\}//g' \ | ||
52 | -e 's/\{\.urllink$//g' | ||
53 | |||
54 | echo "<hr/><p><a href=\"${URL}/?n=${base}.${file}\">Original page</a> where you can edit it.</p>" >> PmWiki/${base}/${file}.md | ||
55 | done | ||
56 | |||
57 | notYetAnotherWiki.lua | ||