aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/SuckItFos
blob: b59f6b0d9acf70622c27fdafe544d53a014f7c52 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/bin/bash

URL="https://fos.wiki.devuan.org"

filter="
 -name _default -prune -o \
 -name _empty -prune -o \
 -name System -prune -o \
 -name Trash -prune -o \
 -name TWiki -prune -o \
"

pushd /opt/merged

find /opt/Foswiki/data  ${filter} \
-name "*.txt" -type f,l -printf "%P\n" | while read line
do
    base=`echo "${line}" | cut -d '/' -f 1`
    file=`echo "${line}" | cut -d '/' -f 2- | rev | cut -b 5- | rev`
    mkdir -p Foswiki/$base
    mkdir -p Foswiki/${base}/`dirname ${file}`
    mkdir -p combined/$base
    mkdir -p combined/${base}/`dirname ${file}`
    echo "Converting ${URL}/${base}/${file}?cover=print -> Foswiki/${base}/${file}.md"
#    pandoc -f html -t markdown --self-contained ${URL}/${base}/${file} >Foswiki/${base}/${file}.md
    # TODO - try curl, to see what is actually downloaded, and maybe not download unchanged pages.  curl to .HTM
    # Doesn't help with redownloads, coz natch a dynamic site isn't cached.  But I can at least comment out the curl command during testing to save time.
    curl --silent --no-progress-meter ${URL}/${base}/${file}?cover=print -o Foswiki/${base}/${file}.HTM

    cp Foswiki/${base}/${file}.HTM Foswiki/${base}/${file}.HTM_ORIGINAL
    csplit -ks Foswiki/${base}/${file}.HTM '%<div id="patternMainContents">%' '/<div class="foswikiAttachments foswikiFormStep" style="overflow:auto">/'
    if [ -f xx00 ]; then
	rm Foswiki/${base}/${file}.HTM
	mv xx00 Foswiki/${base}/${file}.HTM
    fi
    sed -i -E Foswiki/${base}/${file}.HTM \
	-e "s/rel='nofollow'//g" \
	-e 's/rel="nofollow"//g' \
	-e "s/target='_blank'//g" \
	-e "s/class='foswiki[[:alpha:]]*'//g" \
	-e 's/class="foswikiTopic"/class="FoswikiTopic"/g' \
	-e 's/class="foswiki[[:alpha:]]*"//g' \
	-e "s/style='.*;'//g"
#	-e "s/style='background-color: #.*;'//g" \
#	-e "s/style='font-size: .*;'//g"

    pandoc -f html -t commonmark_x --self-contained Foswiki//${base}/${file}.HTM >Foswiki/${base}/${file}.md
    cp Foswiki/${base}/${file}.md Foswiki/${base}/${file}.md_ORIGINAL

#    csplit -ks Foswiki/${base}/${file}.md '%::: {.foswikiTopic}%' '/::: {.foswikiContentFooter}/'
    csplit -ks Foswiki/${base}/${file}.md '%::: {.FoswikiTopic}%' '/::: {.patternInfo}/'
    if [ -f xx00 ]; then
	rm Foswiki/${base}/${file}.md
	mv xx00 Foswiki/${base}/${file}.md
    fi

    # Attempt to clean things up, badly.
    sed -i -E Foswiki/${base}/${file}.md \
	-e 's/\$/\$dlr\$/g' \
	-e 's/\{#.*\}//g' \
	-e 's/\{\.pattern.*\}//g' \
	-e 's/\{\.pattern.*//g' \
	-e '/^<!-- -->/d' \
	-e '/^:::/d'
    echo -e "****\n[Original page](${URL}/${base}/${file}) where maybe you can edit it." >> Foswiki/${base}/${file}.md

#    pandoc -t html -f commonmark_x --self-contained		Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.htm
#    cmark-gfm -t html -e footnotes -e table -e strikethrough	Foswiki/${base}/${file}.md > Foswiki/${base}/${file}.body
#    ln -frs Foswiki/${base}/${file}.body combined/${base}/${file}.body
    ln -frs Foswiki/${base}/${file}.md combined/${base}/${file}.md
done

notYetAnotherWiki.lua

popd