From b21a09b5bfdb263a9f91d3ba642f97196963e09e Mon Sep 17 00:00:00 2001
From: dvs1
Date: Sun, 16 Mar 2025 17:09:50 +1000
Subject: Only suck pages if they have changed, including new pages.
---
SuckIt | 148 ++++++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 86 insertions(+), 62 deletions(-)
diff --git a/SuckIt b/SuckIt
index 7ed0bf6..e548a36 100755
--- a/SuckIt
+++ b/SuckIt
@@ -4,9 +4,9 @@ TIMEFORMAT=" took %lR using %P%% CPU"
time {
pushd /opt/nyaw
-rm -fr Foswiki/*
+#rm -fr Foswiki/*
cp -r /opt/nyaw_EMPTY/Foswiki .
-rm -fr PmWiki/*
+#rm -fr PmWiki/*
cp -r /opt/nyaw_EMPTY/PmWiki .
rm -fr unsorted
mkdir -p unsorted
@@ -33,50 +33,62 @@ do
base=`echo "${line}" | cut -d '/' -f 1`
file=`echo "${line}" | cut -d '/' -f 2- | rev | cut -b 5- | rev`
if [[ ! ${file} =~ (AdminGroup|AdminUser|AdminUserLeftBar|CommentPluginExamples|EditorGroup|GroupTemplate|GroupViewTemplate|NobodyGroup|PatternSkinUserViewTemplate|ProjectContributor|RegistrationAgent|SitePreferences|UnprocessedRegistrations|UnprocessedRegistrationsLog|UserHomepageHeader|UserList|UserListByDateJoined|UserListByLocation|UserList|UserListHeader|WebAtom|WebChanges|WebCreateNewTopic|WebHome|WebIndex|WebLeftBar|WebLeftBarExample|WebNotify|WebPreferences|WebRss|WebSearch|WebSearchAdvanced|WebTopicList|WikiGroups|WikiUsers)$ ]]; then
- realURL=${ogWiki}/${base}/${file}
- time=`date --rfc-3339=seconds -ur /opt/Foswiki/data/${base}/${file}.txt | cut -d '+' -f 1`
- mkdir -p ${ogWiki}/${base}
- mkdir -p ${ogWiki}/${base}/`dirname ${file}`
- echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md
- echo "downloading ${ogURL}/${base}/${file}?cover=print"
- # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time.
- curl --silent --no-progress-meter ${ogURL}/${base}/${file}?cover=print -o ${ogWiki}/${base}/${file}.HTM
- # Attempt to separate user profiles from user content. Doesn't work when people turn their profiles into content.
- dest=""
- if [[ "${base}" == "Main" ]]; then
- dest="unsorted"
- if [ -L users/${file}_fos.md ]; then
- dest='users'
- fi
- mkdir -p `dirname users/${file}`
- sed -i -E ${ogWiki}/${base}/${file}.HTM -e "s%UserForm%%w users/${file}_fos.SED"
- if [ -s users/${file}_fos.SED ]; then
- dest="users"
- fi
- rm users/${file}_fos.SED >/dev/null 2>&1
- rm -d `dirname users/${file}` >/dev/null 2>&1
+ doit='false'
+ if [ ! -s ${ogWiki}/${base}/${file}.HTM ]; then
+ echo "NEW /opt/Foswiki/data/${base}/${file}.txt"
+ doit='true'
+ elif [ /opt/Foswiki/data/${base}/${file}.txt -nt ${ogWiki}/${base}/${file}.HTM ]; then
+ echo "NEWER /opt/Foswiki/data/${base}/${file}.txt"
+ date --rfc-3339=seconds -ur /opt/Foswiki/data/${base}/${file}.txt
+ date --rfc-3339=seconds -ur ${ogWiki}/${base}/${file}.HTM
+ doit='true'
fi
- # "Devuan" is only two pages that get sorted. "Sandbox" is a mixture of standard examples, stuff that was copied to PmWiki, and other things that should get unsorted.
- # Skipping anything with "UnknownUser".
- if [[ "${base}" == "Sandbox" ]]; then
- dest="unsorted"
- mkdir -p `dirname users/${file}`
- sed -i -E ${ogWiki}/${base}/${file}.HTM -e "s%UnknownUser%%w users/${file}_fos.SED"
- if [ -s users/${file}_fos.SED ]; then
- dest=""
+ if [[ ${doit} == "true" ]]; then
+ realURL=${ogWiki}/${base}/${file}
+ time=`date --rfc-3339=seconds -ur /opt/Foswiki/data/${base}/${file}.txt | cut -d '+' -f 1`
+ mkdir -p ${ogWiki}/${base}
+ mkdir -p ${ogWiki}/${base}/`dirname ${file}`
+ echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md
+ echo "downloading ${ogURL}/${base}/${file}?cover=print"
+ # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time.
+ curl --silent --no-progress-meter ${ogURL}/${base}/${file}?cover=print -o ${ogWiki}/${base}/${file}.HTM
+ # Attempt to separate user profiles from user content. Doesn't work when people turn their profiles into content.
+ dest=""
+ if [[ "${base}" == "Main" ]]; then
+ dest="unsorted"
+ if [ -L users/${file}_fos.md ]; then
+ dest='users'
+ fi
+ mkdir -p `dirname users/${file}`
+ sed -i -E ${ogWiki}/${base}/${file}.HTM -e "s%UserForm%%w users/${file}_fos.SED"
+ if [ -s users/${file}_fos.SED ]; then
+ dest="users"
+ fi
+ rm users/${file}_fos.SED >/dev/null 2>&1
+ rm -d `dirname users/${file}` >/dev/null 2>&1
+ fi
+ # "Devuan" is only two pages that get sorted. "Sandbox" is a mixture of standard examples, stuff that was copied to PmWiki, and other things that should get unsorted.
+ # Skipping anything with "UnknownUser".
+ if [[ "${base}" == "Sandbox" ]]; then
+ dest="unsorted"
+ mkdir -p `dirname users/${file}`
+ sed -i -E ${ogWiki}/${base}/${file}.HTM -e "s%UnknownUser%%w users/${file}_fos.SED"
+ if [ -s users/${file}_fos.SED ]; then
+ dest=""
+ fi
+ rm users/${file}_fos.SED >/dev/null 2>&1
+ rm -d `dirname users/${file}` >/dev/null 2>&1
fi
- rm users/${file}_fos.SED >/dev/null 2>&1
- rm -d `dirname users/${file}` >/dev/null 2>&1
- fi
- if [[ "${dest}" != "" ]]; then
- mkdir -p `dirname ${dest}/${file}`
- realURL=${dest}/${file}
- echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}_fos\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md
- touch ${ogWiki}/${base}/${file}.md
- ln -sfr ${ogWiki}/${base}/${file}.md ${dest}/${file}_fos.md
- ln -sfr ${ogWiki}/${base}/${file}.md.md ${dest}/${file}_fos.md.md
- rm ${ogWiki}/${base}/${file}.md
+ if [[ "${dest}" != "" ]]; then
+ mkdir -p `dirname ${dest}/${file}`
+ realURL=${dest}/${file}
+ echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}_fos\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md
+ touch ${ogWiki}/${base}/${file}.md
+ ln -sfr ${ogWiki}/${base}/${file}.md ${dest}/${file}_fos.md
+ ln -sfr ${ogWiki}/${base}/${file}.md.md ${dest}/${file}_fos.md.md
+ rm ${ogWiki}/${base}/${file}.md
+ fi
fi
fi
done
@@ -99,26 +111,38 @@ do
base=`echo "${line}" | cut -d '.' -f 1`
file=`echo "${line}" | cut -d '.' -f 2`
if [[ "${base}" != "Site" ]]; then
- realURL=${ogWiki}/${base}/${file}
- time=`date --rfc-3339=seconds -ur /opt/pmwiki/wiki.d/${base}.${file} | cut -d '+' -f 1`
- mkdir -p ${ogWiki}/${base}
- echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md
-# echo "downloading ${ogURL}/?n=${base}.${file}?action=markdown"
-# curl --no-progress-meter ${ogURL}/?n=${base}.${file}?action=markdown -o ${ogWiki}/${base}/${file}.MARKDOWN
- echo "downloading ${ogURL}/?n=${base}.${file}?action=print"
- curl --no-progress-meter ${ogURL}/?n=${base}.${file}?action=print -o ${ogWiki}/${base}/${file}.HTM
- # Seems there's no way to tell user profiles apart from user content. Unless I can find a list of users somewhere. Don't think there is one.
- if [[ "${base}" == "Profiles" ]]; then
- dest="unsorted"
- if [ -L users/${file}_pm.md ]; then
- dest='users'
+ doit='false'
+ if [ ! -s ${ogWiki}/${base}/${file}.HTM ]; then
+ echo "NEW /opt/pmwiki/wiki.d/${base}.${file} ${ogWiki}/${base}/${file}.HTM"
+ doit='true'
+ elif [ /opt/pmwiki/wiki.d/${base}.${file} -nt ${ogWiki}/${base}/${file}.HTM ]; then
+ echo "NEWER /opt/pmwiki/wiki.d/${base}.${file}"
+ date --rfc-3339=seconds -ur /opt/pmwiki/wiki.d/${base}.${file}
+ date --rfc-3339=seconds -ur ${ogWiki}/${base}/${file}.HTM
+ doit='true'
+ fi
+ if [[ ${doit} == "true" ]]; then
+ realURL=${ogWiki}/${base}/${file}
+ time=`date --rfc-3339=seconds -ur /opt/pmwiki/wiki.d/${base}.${file} | cut -d '+' -f 1`
+ mkdir -p ${ogWiki}/${base}
+ echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md
+# echo "downloading ${ogURL}/?n=${base}.${file}?action=markdown"
+# curl --no-progress-meter ${ogURL}/?n=${base}.${file}?action=markdown -o ${ogWiki}/${base}/${file}.MARKDOWN
+ echo "downloading ${ogURL}/?n=${base}.${file}?action=print"
+ curl --no-progress-meter ${ogURL}/?n=${base}.${file}?action=print -o ${ogWiki}/${base}/${file}.HTM
+ # Seems there's no way to tell user profiles apart from user content. Unless I can find a list of users somewhere. Don't think there is one.
+ if [[ "${base}" == "Profiles" ]]; then
+ dest="unsorted"
+ if [ -L users/${file}_pm.md ]; then
+ dest='users'
+ fi
+ realURL=${dest}/${file}
+ echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}_pm\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md
+ touch ${ogWiki}/${base}/${file}.md
+ ln -sfr ${ogWiki}/${base}/${file}.md ${dest}/${file}_pm.md
+ ln -sfr ${ogWiki}/${base}/${file}.md.md ${dest}/${file}_pm.md.md
+ rm ${ogWiki}/${base}/${file}.md
fi
- realURL=${dest}/${file}
- echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}_pm\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md
- touch ${ogWiki}/${base}/${file}.md
- ln -sfr ${ogWiki}/${base}/${file}.md ${dest}/${file}_pm.md
- ln -sfr ${ogWiki}/${base}/${file}.md.md ${dest}/${file}_pm.md.md
- rm ${ogWiki}/${base}/${file}.md
fi
# TODO - groups are PmWiki/Onefang and PmWiki/Tiki
--
cgit v1.1