From 2252391159b7cc23b954b699c9af895bb23a1cc4 Mon Sep 17 00:00:00 2001 From: "Yann Esposito (Yogsototh)" Date: Fri, 7 May 2021 18:02:58 +0200 Subject: [PATCH] Try to parallelize RSS building better --- Makefile | 18 ++++++++++- engine/mk-rss-entry.sh | 72 ++++++++++++++++++++++++++++++++++++++++++ engine/mkrss.sh | 57 +++++++++------------------------ 3 files changed, 104 insertions(+), 43 deletions(-) create mode 100755 engine/mk-rss-entry.sh diff --git a/Makefile b/Makefile index e804cec..ab20643 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ all: fast SRC_DIR ?= src DST_DIR ?= _site +CACHE_DIR ?= .cache # we don't want to publish files in drafts NO_DRAFT := -not -path '$(SRC_DIR)/drafts/*' @@ -44,6 +45,8 @@ $(DST_DIR)/%.html: $(SRC_DIR)/%.org $(TEMPLATE) @rm $@.tmp ALL += $(DST_PANDOC_FILES) + + # HTML INDEX HTML_INDEX := $(DST_DIR)/index.html MKINDEX := engine/mk-index.sh @@ -53,9 +56,21 @@ $(HTML_INDEX): $(DST_PANDOC_FILES) $(MKINDEX) ALL += $(HTML_INDEX) # RSS + +SRC_POSTS_DIR ?= $(SRC_DIR)/posts +SRC_POST_FILES ?= $(shell find $(SRC_POSTS_DIR) -type f -name "*$(EXT)") +RSS_CACHE_DIR ?= $(CACHE_DIR)/rss +DST_RSS_FILES ?= $(subst .$(EXT),.rss, \ + $(patsubst $(SRC_POSTS_DIR)/%,$(RSS_CACHE_DIR)/%, \ + $(SRC_POSTS_FILES))) +MK_RSS_ENTRY := ./engine/mk-rss-entry.sh +$(RSS_CACHE_DIR)/%.rss: $(DST_DIR)/posts/%.html $(MK_RSS_ENTRY) + @mkdir -p $(RSS_CACHE_DIR) + $(MK_RSS_ENTRY) $@ + RSS := $(DST_DIR)/rss.xml MKRSS := engine/mkrss.sh -$(RSS): $(DST_PANDOC_FILES) $(MKRSS) +$(RSS): $(DST_RSS_FILES) $(MKRSS) $(MKRSS) ALL += $(RSS) @@ -119,3 +134,4 @@ fast: $(ALL) clean: -[ ! -z "$(DST_DIR)" ] && rm -rf $(DST_DIR)/* + -[ ! -z "$(CACHE_DIR)" ] && rm -rf $(CACHE_DIR)/* diff --git a/engine/mk-rss-entry.sh b/engine/mk-rss-entry.sh new file mode 100755 index 0000000..4a6f26c --- /dev/null +++ b/engine/mk-rss-entry.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env zsh + +cd "$(git rev-parse --show-toplevel)" || exit 1 +# Directory +webdir="_site" +postsdir="$webdir/posts" +indexdir=".cache/rss" + +# file to handle +fic="$1" + +# RSS Metas +websiteurl="https://her.esy.fun" + +# HTML Accessors (similar to CSS accessors) +dateaccessor='.yyydate' +contentaccessor='#content' +# title and keyword shouldn't be changed +titleaccessor='title' +keywordsaccessor='meta[name=keywords]::attr(content)' + +formatdate() { + # format the date for RSS + local d="$1" + # echo "DEBUG DATE: $d" >&2 + LC_TIME=en_US date --date $d +'%a, %d %b %Y %H:%M:%S %z' +} + +finddate(){ < $1 hxselect -c $dateaccessor | sed 's/\[//g;s/\]//g;s/ .*$//' } +findtitle(){ < $1 hxselect -c $titleaccessor } +getcontent(){ + < $1 hxselect $contentaccessor | \ + perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' } +findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,/ /g' } + +mkcategories(){ + for keyword in $*; do + printf "\\n%s" $keyword + done +} + +autoload -U colors && colors + +if echo $fic|egrep -- '-(mk|min|sci|modern).html$'>/dev/null; then + continue +fi + +postfile="$(echo "$fic"|sed 's#^'$postsdir'/##')" +blogfile="$(echo "$fic"|sed 's#^'$webdir'/##')" +printf "%-30s" $postfile +xfic="$indexdir/$fic.xml" +mkdir -p $(dirname $xfic) +hxclean $fic > $xfic +d=$(finddate $xfic) +echo -n " [$d]" +rssdate=$(formatdate $d) +title=$(findtitle $xfic) +keywords=( $(findkeywords $xfic) ) +printf ": %-55s" "$title ($keywords)" +categories=$(mkcategories $keywords) +absoluteurl="${websiteurl}/${blogfile}" +dst="$indexdir/$fic.rss" +mkdir -p $(dirname $dst) +{ printf "\\n" + printf "\\n%s" "$title" + printf "\\n%s" "$absoluteurl" + printf "\\n%s%s" "$rssdate" + printf "%s" "$categories" + printf "\\n" "$(getcontent "$xfic" "$absoluteurl")" + printf "\\n\\n\\n" +} >> "$dst" +echo " [${fg[green]}OK${reset_color}]" diff --git a/engine/mkrss.sh b/engine/mkrss.sh index 8a8bcbf..f53253a 100755 --- a/engine/mkrss.sh +++ b/engine/mkrss.sh @@ -5,6 +5,7 @@ cd "$(git rev-parse --show-toplevel)" || exit 1 webdir="_site" postsdir="$webdir/posts" rssfile="$webdir/rss.xml" +indexdir=".cache/rss" # maximal number of articles to put in the RSS file maxarticles=10 @@ -19,11 +20,7 @@ rssauthor="yann@esposito.host (Yann Esposito)" rssimgurl="https://her.esy.fun/img/FlatAvatar.png" # HTML Accessors (similar to CSS accessors) -dateaccessor='.yyydate' -contentaccessor='#content' -# title and keyword shouldn't be changed -titleaccessor='title' -keywordsaccessor='meta[name=keywords]::attr(content)' +dateaccessor='pubDate' formatdate() { # format the date for RSS @@ -32,51 +29,27 @@ formatdate() { LC_TIME=en_US date --date $d +'%a, %d %b %Y %H:%M:%S %z' } -finddate(){ < $1 hxselect -c $dateaccessor | sed 's/\[//g;s/\]//g;s/ .*$//' } -findtitle(){ < $1 hxselect -c $titleaccessor } -getcontent(){ - < $1 hxselect $contentaccessor | \ - perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' } -findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,/ /g' } -mkcategories(){ - for keyword in $*; do - printf "\\n%s" $keyword - done +isodate() { + # format the date for sorting + local d="$1" + # echo "DEBUG DATE: $d" >&2 + LC_TIME=en_US date --date $d +'%Y-%m-%dT%H:%M:%S' } +finddate(){ < $1 hxselect -c $dateaccessor | sed 's/\[//g;s/\]//g;s/ .*$//' } + autoload -U colors && colors -tmpdir=$(mktemp -d) typeset -a dates dates=( ) -for fic in $postsdir/**/*.html; do - if echo $fic|egrep -- '-(mk|min|sci|modern).html$'>/dev/null; then - continue - fi - postfile="$(echo "$fic"|sed 's#^'$postsdir'/##')" - blogfile="$(echo "$fic"|sed 's#^'$webdir'/##')" - printf "%-30s" $postfile - xfic="$tmpdir/$fic.xml" - mkdir -p $(dirname $xfic) - hxclean $fic > $xfic - d=$(finddate $xfic) - echo -n " [$d]" - rssdate=$(formatdate $d) - title=$(findtitle $xfic) - keywords=( $(findkeywords $xfic) ) - printf ": %-55s" "$title ($keywords)" - categories=$(mkcategories $keywords) - absoluteurl="${websiteurl}/${blogfile}" - { printf "\\n" - printf "\\n%s" "$title" - printf "\\n%s" "$absoluteurl" - printf "\\n%s%s" "$rssdate" - printf "%s" "$categories" - printf "\\n" "$(getcontent "$xfic" "$absoluteurl")" - printf "\\n\\n\\n" - } >> "$tmpdir/${d}-$(basename $fic).rss" +tmpdir=$(mktemp -d) +for fic in $indexdir/*.rss; do + rssdate=$(finddate $xfic) + echo -n "${fic:t} [$d]" + d=$(isodate $rssdate) dates=( $d $dates ) echo " [${fg[green]}OK${reset_color}]" + cp $fic $tmpdir/$d-${fic:t}.rss done echo "Publishing" for fic in $(ls $tmpdir/*.rss | sort -r | head -n $maxarticles ); do