Try to parallelize RSS building better
This commit is contained in:
parent
1e727b5d2b
commit
2252391159
18
Makefile
18
Makefile
|
@ -7,6 +7,7 @@
|
||||||
all: fast
|
all: fast
|
||||||
SRC_DIR ?= src
|
SRC_DIR ?= src
|
||||||
DST_DIR ?= _site
|
DST_DIR ?= _site
|
||||||
|
CACHE_DIR ?= .cache
|
||||||
|
|
||||||
# we don't want to publish files in drafts
|
# we don't want to publish files in drafts
|
||||||
NO_DRAFT := -not -path '$(SRC_DIR)/drafts/*'
|
NO_DRAFT := -not -path '$(SRC_DIR)/drafts/*'
|
||||||
|
@ -44,6 +45,8 @@ $(DST_DIR)/%.html: $(SRC_DIR)/%.org $(TEMPLATE)
|
||||||
@rm $@.tmp
|
@rm $@.tmp
|
||||||
ALL += $(DST_PANDOC_FILES)
|
ALL += $(DST_PANDOC_FILES)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# HTML INDEX
|
# HTML INDEX
|
||||||
HTML_INDEX := $(DST_DIR)/index.html
|
HTML_INDEX := $(DST_DIR)/index.html
|
||||||
MKINDEX := engine/mk-index.sh
|
MKINDEX := engine/mk-index.sh
|
||||||
|
@ -53,9 +56,21 @@ $(HTML_INDEX): $(DST_PANDOC_FILES) $(MKINDEX)
|
||||||
ALL += $(HTML_INDEX)
|
ALL += $(HTML_INDEX)
|
||||||
|
|
||||||
# RSS
|
# RSS
|
||||||
|
|
||||||
|
SRC_POSTS_DIR ?= $(SRC_DIR)/posts
|
||||||
|
SRC_POST_FILES ?= $(shell find $(SRC_POSTS_DIR) -type f -name "*$(EXT)")
|
||||||
|
RSS_CACHE_DIR ?= $(CACHE_DIR)/rss
|
||||||
|
DST_RSS_FILES ?= $(subst .$(EXT),.rss, \
|
||||||
|
$(patsubst $(SRC_POSTS_DIR)/%,$(RSS_CACHE_DIR)/%, \
|
||||||
|
$(SRC_POSTS_FILES)))
|
||||||
|
MK_RSS_ENTRY := ./engine/mk-rss-entry.sh
|
||||||
|
$(RSS_CACHE_DIR)/%.rss: $(DST_DIR)/posts/%.html $(MK_RSS_ENTRY)
|
||||||
|
@mkdir -p $(RSS_CACHE_DIR)
|
||||||
|
$(MK_RSS_ENTRY) $@
|
||||||
|
|
||||||
RSS := $(DST_DIR)/rss.xml
|
RSS := $(DST_DIR)/rss.xml
|
||||||
MKRSS := engine/mkrss.sh
|
MKRSS := engine/mkrss.sh
|
||||||
$(RSS): $(DST_PANDOC_FILES) $(MKRSS)
|
$(RSS): $(DST_RSS_FILES) $(MKRSS)
|
||||||
$(MKRSS)
|
$(MKRSS)
|
||||||
ALL += $(RSS)
|
ALL += $(RSS)
|
||||||
|
|
||||||
|
@ -119,3 +134,4 @@ fast: $(ALL)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-[ ! -z "$(DST_DIR)" ] && rm -rf $(DST_DIR)/*
|
-[ ! -z "$(DST_DIR)" ] && rm -rf $(DST_DIR)/*
|
||||||
|
-[ ! -z "$(CACHE_DIR)" ] && rm -rf $(CACHE_DIR)/*
|
||||||
|
|
72
engine/mk-rss-entry.sh
Executable file
72
engine/mk-rss-entry.sh
Executable file
|
@ -0,0 +1,72 @@
|
||||||
|
#!/usr/bin/env zsh
|
||||||
|
|
||||||
|
cd "$(git rev-parse --show-toplevel)" || exit 1
|
||||||
|
# Directory
|
||||||
|
webdir="_site"
|
||||||
|
postsdir="$webdir/posts"
|
||||||
|
indexdir=".cache/rss"
|
||||||
|
|
||||||
|
# file to handle
|
||||||
|
fic="$1"
|
||||||
|
|
||||||
|
# RSS Metas
|
||||||
|
websiteurl="https://her.esy.fun"
|
||||||
|
|
||||||
|
# HTML Accessors (similar to CSS accessors)
|
||||||
|
dateaccessor='.yyydate'
|
||||||
|
contentaccessor='#content'
|
||||||
|
# title and keyword shouldn't be changed
|
||||||
|
titleaccessor='title'
|
||||||
|
keywordsaccessor='meta[name=keywords]::attr(content)'
|
||||||
|
|
||||||
|
formatdate() {
|
||||||
|
# format the date for RSS
|
||||||
|
local d="$1"
|
||||||
|
# echo "DEBUG DATE: $d" >&2
|
||||||
|
LC_TIME=en_US date --date $d +'%a, %d %b %Y %H:%M:%S %z'
|
||||||
|
}
|
||||||
|
|
||||||
|
finddate(){ < $1 hxselect -c $dateaccessor | sed 's/\[//g;s/\]//g;s/ .*$//' }
|
||||||
|
findtitle(){ < $1 hxselect -c $titleaccessor }
|
||||||
|
getcontent(){
|
||||||
|
< $1 hxselect $contentaccessor | \
|
||||||
|
perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' }
|
||||||
|
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,/ /g' }
|
||||||
|
|
||||||
|
mkcategories(){
|
||||||
|
for keyword in $*; do
|
||||||
|
printf "\\n<category>%s</category>" $keyword
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
autoload -U colors && colors
|
||||||
|
|
||||||
|
if echo $fic|egrep -- '-(mk|min|sci|modern).html$'>/dev/null; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
postfile="$(echo "$fic"|sed 's#^'$postsdir'/##')"
|
||||||
|
blogfile="$(echo "$fic"|sed 's#^'$webdir'/##')"
|
||||||
|
printf "%-30s" $postfile
|
||||||
|
xfic="$indexdir/$fic.xml"
|
||||||
|
mkdir -p $(dirname $xfic)
|
||||||
|
hxclean $fic > $xfic
|
||||||
|
d=$(finddate $xfic)
|
||||||
|
echo -n " [$d]"
|
||||||
|
rssdate=$(formatdate $d)
|
||||||
|
title=$(findtitle $xfic)
|
||||||
|
keywords=( $(findkeywords $xfic) )
|
||||||
|
printf ": %-55s" "$title ($keywords)"
|
||||||
|
categories=$(mkcategories $keywords)
|
||||||
|
absoluteurl="${websiteurl}/${blogfile}"
|
||||||
|
dst="$indexdir/$fic.rss"
|
||||||
|
mkdir -p $(dirname $dst)
|
||||||
|
{ printf "\\n<item>"
|
||||||
|
printf "\\n<title>%s</title>" "$title"
|
||||||
|
printf "\\n<guid>%s</guid>" "$absoluteurl"
|
||||||
|
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
|
||||||
|
printf "%s" "$categories"
|
||||||
|
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic" "$absoluteurl")"
|
||||||
|
printf "\\n</item>\\n\\n"
|
||||||
|
} >> "$dst"
|
||||||
|
echo " [${fg[green]}OK${reset_color}]"
|
|
@ -5,6 +5,7 @@ cd "$(git rev-parse --show-toplevel)" || exit 1
|
||||||
webdir="_site"
|
webdir="_site"
|
||||||
postsdir="$webdir/posts"
|
postsdir="$webdir/posts"
|
||||||
rssfile="$webdir/rss.xml"
|
rssfile="$webdir/rss.xml"
|
||||||
|
indexdir=".cache/rss"
|
||||||
|
|
||||||
# maximal number of articles to put in the RSS file
|
# maximal number of articles to put in the RSS file
|
||||||
maxarticles=10
|
maxarticles=10
|
||||||
|
@ -19,11 +20,7 @@ rssauthor="yann@esposito.host (Yann Esposito)"
|
||||||
rssimgurl="https://her.esy.fun/img/FlatAvatar.png"
|
rssimgurl="https://her.esy.fun/img/FlatAvatar.png"
|
||||||
|
|
||||||
# HTML Accessors (similar to CSS accessors)
|
# HTML Accessors (similar to CSS accessors)
|
||||||
dateaccessor='.yyydate'
|
dateaccessor='pubDate'
|
||||||
contentaccessor='#content'
|
|
||||||
# title and keyword shouldn't be changed
|
|
||||||
titleaccessor='title'
|
|
||||||
keywordsaccessor='meta[name=keywords]::attr(content)'
|
|
||||||
|
|
||||||
formatdate() {
|
formatdate() {
|
||||||
# format the date for RSS
|
# format the date for RSS
|
||||||
|
@ -32,51 +29,27 @@ formatdate() {
|
||||||
LC_TIME=en_US date --date $d +'%a, %d %b %Y %H:%M:%S %z'
|
LC_TIME=en_US date --date $d +'%a, %d %b %Y %H:%M:%S %z'
|
||||||
}
|
}
|
||||||
|
|
||||||
finddate(){ < $1 hxselect -c $dateaccessor | sed 's/\[//g;s/\]//g;s/ .*$//' }
|
isodate() {
|
||||||
findtitle(){ < $1 hxselect -c $titleaccessor }
|
# format the date for sorting
|
||||||
getcontent(){
|
local d="$1"
|
||||||
< $1 hxselect $contentaccessor | \
|
# echo "DEBUG DATE: $d" >&2
|
||||||
perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' }
|
LC_TIME=en_US date --date $d +'%Y-%m-%dT%H:%M:%S'
|
||||||
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,/ /g' }
|
|
||||||
mkcategories(){
|
|
||||||
for keyword in $*; do
|
|
||||||
printf "\\n<category>%s</category>" $keyword
|
|
||||||
done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
finddate(){ < $1 hxselect -c $dateaccessor | sed 's/\[//g;s/\]//g;s/ .*$//' }
|
||||||
|
|
||||||
autoload -U colors && colors
|
autoload -U colors && colors
|
||||||
|
|
||||||
tmpdir=$(mktemp -d)
|
|
||||||
typeset -a dates
|
typeset -a dates
|
||||||
dates=( )
|
dates=( )
|
||||||
for fic in $postsdir/**/*.html; do
|
tmpdir=$(mktemp -d)
|
||||||
if echo $fic|egrep -- '-(mk|min|sci|modern).html$'>/dev/null; then
|
for fic in $indexdir/*.rss; do
|
||||||
continue
|
rssdate=$(finddate $xfic)
|
||||||
fi
|
echo -n "${fic:t} [$d]"
|
||||||
postfile="$(echo "$fic"|sed 's#^'$postsdir'/##')"
|
d=$(isodate $rssdate)
|
||||||
blogfile="$(echo "$fic"|sed 's#^'$webdir'/##')"
|
|
||||||
printf "%-30s" $postfile
|
|
||||||
xfic="$tmpdir/$fic.xml"
|
|
||||||
mkdir -p $(dirname $xfic)
|
|
||||||
hxclean $fic > $xfic
|
|
||||||
d=$(finddate $xfic)
|
|
||||||
echo -n " [$d]"
|
|
||||||
rssdate=$(formatdate $d)
|
|
||||||
title=$(findtitle $xfic)
|
|
||||||
keywords=( $(findkeywords $xfic) )
|
|
||||||
printf ": %-55s" "$title ($keywords)"
|
|
||||||
categories=$(mkcategories $keywords)
|
|
||||||
absoluteurl="${websiteurl}/${blogfile}"
|
|
||||||
{ printf "\\n<item>"
|
|
||||||
printf "\\n<title>%s</title>" "$title"
|
|
||||||
printf "\\n<guid>%s</guid>" "$absoluteurl"
|
|
||||||
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
|
|
||||||
printf "%s" "$categories"
|
|
||||||
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic" "$absoluteurl")"
|
|
||||||
printf "\\n</item>\\n\\n"
|
|
||||||
} >> "$tmpdir/${d}-$(basename $fic).rss"
|
|
||||||
dates=( $d $dates )
|
dates=( $d $dates )
|
||||||
echo " [${fg[green]}OK${reset_color}]"
|
echo " [${fg[green]}OK${reset_color}]"
|
||||||
|
cp $fic $tmpdir/$d-${fic:t}.rss
|
||||||
done
|
done
|
||||||
echo "Publishing"
|
echo "Publishing"
|
||||||
for fic in $(ls $tmpdir/*.rss | sort -r | head -n $maxarticles ); do
|
for fic in $(ls $tmpdir/*.rss | sort -r | head -n $maxarticles ); do
|
||||||
|
|
Loading…
Reference in a new issue