From b21a09b5bfdb263a9f91d3ba642f97196963e09e Mon Sep 17 00:00:00 2001 From: dvs1 Date: Sun, 16 Mar 2025 17:09:50 +1000 Subject: Only suck pages if they have changed, including new pages. --- SuckIt | 148 ++++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 86 insertions(+), 62 deletions(-) diff --git a/SuckIt b/SuckIt index 7ed0bf6..e548a36 100755 --- a/SuckIt +++ b/SuckIt @@ -4,9 +4,9 @@ TIMEFORMAT=" took %lR using %P%% CPU" time { pushd /opt/nyaw -rm -fr Foswiki/* +#rm -fr Foswiki/* cp -r /opt/nyaw_EMPTY/Foswiki . -rm -fr PmWiki/* +#rm -fr PmWiki/* cp -r /opt/nyaw_EMPTY/PmWiki . rm -fr unsorted mkdir -p unsorted @@ -33,50 +33,62 @@ do base=`echo "${line}" | cut -d '/' -f 1` file=`echo "${line}" | cut -d '/' -f 2- | rev | cut -b 5- | rev` if [[ ! ${file} =~ (AdminGroup|AdminUser|AdminUserLeftBar|CommentPluginExamples|EditorGroup|GroupTemplate|GroupViewTemplate|NobodyGroup|PatternSkinUserViewTemplate|ProjectContributor|RegistrationAgent|SitePreferences|UnprocessedRegistrations|UnprocessedRegistrationsLog|UserHomepageHeader|UserList|UserListByDateJoined|UserListByLocation|UserList|UserListHeader|WebAtom|WebChanges|WebCreateNewTopic|WebHome|WebIndex|WebLeftBar|WebLeftBarExample|WebNotify|WebPreferences|WebRss|WebSearch|WebSearchAdvanced|WebTopicList|WikiGroups|WikiUsers)$ ]]; then - realURL=${ogWiki}/${base}/${file} - time=`date --rfc-3339=seconds -ur /opt/Foswiki/data/${base}/${file}.txt | cut -d '+' -f 1` - mkdir -p ${ogWiki}/${base} - mkdir -p ${ogWiki}/${base}/`dirname ${file}` - echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md - echo "downloading ${ogURL}/${base}/${file}?cover=print" - # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. - curl --silent --no-progress-meter ${ogURL}/${base}/${file}?cover=print -o ${ogWiki}/${base}/${file}.HTM - # Attempt to separate user profiles from user content. Doesn't work when people turn their profiles into content. - dest="" - if [[ "${base}" == "Main" ]]; then - dest="unsorted" - if [ -L users/${file}_fos.md ]; then - dest='users' - fi - mkdir -p `dirname users/${file}` - sed -i -E ${ogWiki}/${base}/${file}.HTM -e "s%UserForm%%w users/${file}_fos.SED" - if [ -s users/${file}_fos.SED ]; then - dest="users" - fi - rm users/${file}_fos.SED >/dev/null 2>&1 - rm -d `dirname users/${file}` >/dev/null 2>&1 + doit='false' + if [ ! -s ${ogWiki}/${base}/${file}.HTM ]; then + echo "NEW /opt/Foswiki/data/${base}/${file}.txt" + doit='true' + elif [ /opt/Foswiki/data/${base}/${file}.txt -nt ${ogWiki}/${base}/${file}.HTM ]; then + echo "NEWER /opt/Foswiki/data/${base}/${file}.txt" + date --rfc-3339=seconds -ur /opt/Foswiki/data/${base}/${file}.txt + date --rfc-3339=seconds -ur ${ogWiki}/${base}/${file}.HTM + doit='true' fi - # "Devuan" is only two pages that get sorted. "Sandbox" is a mixture of standard examples, stuff that was copied to PmWiki, and other things that should get unsorted. - # Skipping anything with "UnknownUser". - if [[ "${base}" == "Sandbox" ]]; then - dest="unsorted" - mkdir -p `dirname users/${file}` - sed -i -E ${ogWiki}/${base}/${file}.HTM -e "s%UnknownUser%%w users/${file}_fos.SED" - if [ -s users/${file}_fos.SED ]; then - dest="" + if [[ ${doit} == "true" ]]; then + realURL=${ogWiki}/${base}/${file} + time=`date --rfc-3339=seconds -ur /opt/Foswiki/data/${base}/${file}.txt | cut -d '+' -f 1` + mkdir -p ${ogWiki}/${base} + mkdir -p ${ogWiki}/${base}/`dirname ${file}` + echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md + echo "downloading ${ogURL}/${base}/${file}?cover=print" + # Doesn't help with redownloads, coz natch a dynamic site isn't cached. But I can at least comment out the curl command during testing to save time. + curl --silent --no-progress-meter ${ogURL}/${base}/${file}?cover=print -o ${ogWiki}/${base}/${file}.HTM + # Attempt to separate user profiles from user content. Doesn't work when people turn their profiles into content. + dest="" + if [[ "${base}" == "Main" ]]; then + dest="unsorted" + if [ -L users/${file}_fos.md ]; then + dest='users' + fi + mkdir -p `dirname users/${file}` + sed -i -E ${ogWiki}/${base}/${file}.HTM -e "s%UserForm%%w users/${file}_fos.SED" + if [ -s users/${file}_fos.SED ]; then + dest="users" + fi + rm users/${file}_fos.SED >/dev/null 2>&1 + rm -d `dirname users/${file}` >/dev/null 2>&1 + fi + # "Devuan" is only two pages that get sorted. "Sandbox" is a mixture of standard examples, stuff that was copied to PmWiki, and other things that should get unsorted. + # Skipping anything with "UnknownUser". + if [[ "${base}" == "Sandbox" ]]; then + dest="unsorted" + mkdir -p `dirname users/${file}` + sed -i -E ${ogWiki}/${base}/${file}.HTM -e "s%UnknownUser%%w users/${file}_fos.SED" + if [ -s users/${file}_fos.SED ]; then + dest="" + fi + rm users/${file}_fos.SED >/dev/null 2>&1 + rm -d `dirname users/${file}` >/dev/null 2>&1 fi - rm users/${file}_fos.SED >/dev/null 2>&1 - rm -d `dirname users/${file}` >/dev/null 2>&1 - fi - if [[ "${dest}" != "" ]]; then - mkdir -p `dirname ${dest}/${file}` - realURL=${dest}/${file} - echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}_fos\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md - touch ${ogWiki}/${base}/${file}.md - ln -sfr ${ogWiki}/${base}/${file}.md ${dest}/${file}_fos.md - ln -sfr ${ogWiki}/${base}/${file}.md.md ${dest}/${file}_fos.md.md - rm ${ogWiki}/${base}/${file}.md + if [[ "${dest}" != "" ]]; then + mkdir -p `dirname ${dest}/${file}` + realURL=${dest}/${file} + echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}_fos\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md + touch ${ogWiki}/${base}/${file}.md + ln -sfr ${ogWiki}/${base}/${file}.md ${dest}/${file}_fos.md + ln -sfr ${ogWiki}/${base}/${file}.md.md ${dest}/${file}_fos.md.md + rm ${ogWiki}/${base}/${file}.md + fi fi fi done @@ -99,26 +111,38 @@ do base=`echo "${line}" | cut -d '.' -f 1` file=`echo "${line}" | cut -d '.' -f 2` if [[ "${base}" != "Site" ]]; then - realURL=${ogWiki}/${base}/${file} - time=`date --rfc-3339=seconds -ur /opt/pmwiki/wiki.d/${base}.${file} | cut -d '+' -f 1` - mkdir -p ${ogWiki}/${base} - echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md -# echo "downloading ${ogURL}/?n=${base}.${file}?action=markdown" -# curl --no-progress-meter ${ogURL}/?n=${base}.${file}?action=markdown -o ${ogWiki}/${base}/${file}.MARKDOWN - echo "downloading ${ogURL}/?n=${base}.${file}?action=print" - curl --no-progress-meter ${ogURL}/?n=${base}.${file}?action=print -o ${ogWiki}/${base}/${file}.HTM - # Seems there's no way to tell user profiles apart from user content. Unless I can find a list of users somewhere. Don't think there is one. - if [[ "${base}" == "Profiles" ]]; then - dest="unsorted" - if [ -L users/${file}_pm.md ]; then - dest='users' + doit='false' + if [ ! -s ${ogWiki}/${base}/${file}.HTM ]; then + echo "NEW /opt/pmwiki/wiki.d/${base}.${file} ${ogWiki}/${base}/${file}.HTM" + doit='true' + elif [ /opt/pmwiki/wiki.d/${base}.${file} -nt ${ogWiki}/${base}/${file}.HTM ]; then + echo "NEWER /opt/pmwiki/wiki.d/${base}.${file}" + date --rfc-3339=seconds -ur /opt/pmwiki/wiki.d/${base}.${file} + date --rfc-3339=seconds -ur ${ogWiki}/${base}/${file}.HTM + doit='true' + fi + if [[ ${doit} == "true" ]]; then + realURL=${ogWiki}/${base}/${file} + time=`date --rfc-3339=seconds -ur /opt/pmwiki/wiki.d/${base}.${file} | cut -d '+' -f 1` + mkdir -p ${ogWiki}/${base} + echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md +# echo "downloading ${ogURL}/?n=${base}.${file}?action=markdown" +# curl --no-progress-meter ${ogURL}/?n=${base}.${file}?action=markdown -o ${ogWiki}/${base}/${file}.MARKDOWN + echo "downloading ${ogURL}/?n=${base}.${file}?action=print" + curl --no-progress-meter ${ogURL}/?n=${base}.${file}?action=print -o ${ogWiki}/${base}/${file}.HTM + # Seems there's no way to tell user profiles apart from user content. Unless I can find a list of users somewhere. Don't think there is one. + if [[ "${base}" == "Profiles" ]]; then + dest="unsorted" + if [ -L users/${file}_pm.md ]; then + dest='users' + fi + realURL=${dest}/${file} + echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}_pm\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md + touch ${ogWiki}/${base}/${file}.md + ln -sfr ${ogWiki}/${base}/${file}.md ${dest}/${file}_pm.md + ln -sfr ${ogWiki}/${base}/${file}.md.md ${dest}/${file}_pm.md.md + rm ${ogWiki}/${base}/${file}.md fi - realURL=${dest}/${file} - echo -e "ogWiki=${ogWiki}\nogURL=${ogURL}\nrealURL=${realURL}_pm\nogBase=${base}\nogFile=${file}\ntimestamp=${time}\n" > ${ogWiki}/${base}/${file}.md.md - touch ${ogWiki}/${base}/${file}.md - ln -sfr ${ogWiki}/${base}/${file}.md ${dest}/${file}_pm.md - ln -sfr ${ogWiki}/${base}/${file}.md.md ${dest}/${file}_pm.md.md - rm ${ogWiki}/${base}/${file}.md fi # TODO - groups are PmWiki/Onefang and PmWiki/Tiki -- cgit v1.1