her.esy.fun/engine/mkrss.sh

132 lines
4.0 KiB
Bash
Raw Normal View History

2020-05-25 20:28:06 +00:00
#!/usr/bin/env zsh
2019-09-24 15:55:59 +00:00
2020-02-10 08:42:17 +00:00
cd "$(git rev-parse --show-toplevel)" || exit 1
2019-09-30 13:10:39 +00:00
# Directory
2020-02-16 15:56:19 +00:00
webdir="_optim"
2019-09-30 07:54:53 +00:00
postsdir="$webdir/posts"
2019-09-24 15:55:59 +00:00
rssfile="$webdir/rss.xml"
2019-09-30 13:10:39 +00:00
# maximal number of articles to put in the RSS file
maxarticles=10
# RSS Metas
rsstitle="her.esy.fun"
rssurl="https://her.esy.fun/rss.xml"
websiteurl="https://her.esy.fun"
rssdescription="her.esy.fun articles, mostly random personal thoughts"
rsslang="en"
rssauthor="yann@esposito.host (Yann Esposito)"
rssimgurl="https://her.esy.fun/img/FlatAvatar.png"
# HTML Accessors (similar to CSS accessors)
2021-04-27 12:34:29 +00:00
dateaccessor='.yyydate'
2019-09-30 13:10:39 +00:00
contentaccessor='#content'
# title and keyword shouldn't be changed
titleaccessor='title'
keywordsaccessor='meta[name=keywords]::attr(content)'
2019-09-24 15:55:59 +00:00
formatdate() {
2019-09-30 13:10:39 +00:00
# format the date for RSS
2021-04-27 12:34:29 +00:00
local d="$1"
# echo "DEBUG DATE: $d" >&2
2019-09-24 15:55:59 +00:00
LC_TIME=en_US date --date $d +'%a, %d %b %Y %H:%M:%S %z'
}
2019-09-30 13:10:39 +00:00
2021-04-27 12:34:29 +00:00
finddate(){ < $1 hxselect -c $dateaccessor | sed 's/\[//g;s/\]//g;s/ .*$//' }
2019-09-30 13:10:39 +00:00
findtitle(){ < $1 hxselect -c $titleaccessor }
getcontent(){
< $1 hxselect $contentaccessor | \
perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' }
2020-01-04 14:29:27 +00:00
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,/ /g' }
2019-09-30 07:54:53 +00:00
mkcategories(){
for keyword in $*; do
printf "\\n<category>%s</category>" $keyword
done
}
2019-09-24 15:55:59 +00:00
2019-09-25 10:11:24 +00:00
autoload -U colors && colors
2019-09-24 15:55:59 +00:00
tmpdir=$(mktemp -d)
2019-09-30 13:10:39 +00:00
typeset -a dates
dates=( )
2019-09-30 07:54:53 +00:00
for fic in $postsdir/**/*.html; do
2019-11-27 18:06:42 +00:00
if echo $fic|egrep -- '-(mk|min|sci|modern).html$'>/dev/null; then
continue
fi
2019-09-30 19:01:46 +00:00
postfile="$(echo "$fic"|sed 's#^'$postsdir'/##')"
blogfile="$(echo "$fic"|sed 's#^'$webdir'/##')"
printf "%-30s" $postfile
xfic="$tmpdir/$fic.xml"
mkdir -p $(dirname $xfic)
2019-09-30 13:10:39 +00:00
hxclean $fic > $xfic
d=$(finddate $xfic)
2019-09-25 10:11:24 +00:00
echo -n " [$d]"
rssdate=$(formatdate $d)
title=$(findtitle $xfic)
2019-09-30 07:54:53 +00:00
keywords=( $(findkeywords $xfic) )
printf ": %-55s" "$title ($keywords)"
categories=$(mkcategories $keywords)
absoluteurl="${websiteurl}/${blogfile}"
2019-09-30 13:10:39 +00:00
{ printf "\\n<item>"
printf "\\n<title>%s</title>" "$title"
printf "\\n<guid>%s</guid>" "$absoluteurl"
2019-09-30 13:10:39 +00:00
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
printf "%s" "$categories"
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic" "$absoluteurl")"
2019-09-30 13:10:39 +00:00
printf "\\n</item>\\n\\n"
} >> "$tmpdir/${d}-$(basename $fic).rss"
dates=( $d $dates )
2019-09-25 10:11:24 +00:00
echo " [${fg[green]}OK${reset_color}]"
done
2019-09-30 13:10:39 +00:00
echo "Publishing"
for fic in $(ls $tmpdir/*.rss | sort -r | head -n $maxarticles ); do
echo "${fic:t}"
2019-09-25 10:11:24 +00:00
cat $fic >> $tmpdir/rss
2019-09-24 15:55:59 +00:00
done
2019-09-30 13:10:39 +00:00
rssmaxdate=$(formatdate $(for d in $dates; do echo $d; done | sort -r | head -n 1))
rssbuilddate=$(formatdate $(date))
{
cat <<END
<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
xmlns:georss="http://www.georss.org/georss"
xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
xmlns:media="http://search.yahoo.com/mrss/"><channel>
<title>${rsstitle}</title>
<atom:link href="${rssurl}" rel="self" type="application/rss+xml" />
<link>${websiteurl}</link>
<description><![CDATA[${rssdescription}]]></description>
<language>${rsslang}</language>
<pubDate>${rssmaxdate}</pubDate>
<lastBuildDate>$rssbuilddate</lastBuildDate>
<generator>mkrss.sh</generator>
<webMaster>${rssauthor}</webMaster>
<image>
<url>${rssimgurl}</url>
<title>${rsstitle}</title>
2019-09-30 13:10:39 +00:00
<link>${websiteurl}</link>
</image>
END
cat $tmpdir/rss
cat <<END
</channel>
</rss>
END
} > "$rssfile"
2021-05-02 08:32:40 +00:00
# HACK TO UPDATE OLD RSS FEEDS
legacyenrss="$webdir/Scratch/en/blog/feed/feed.xml"
legacyfrrss="$webdir/Scratch/fr/blog/feed/feed.xml"
cp -f "$rssfile" "$legacyenrss"
cp -f "$rssfile" "$legacyfrrss"
2019-09-25 10:11:24 +00:00
rm -rf $tmpdir
2019-12-22 23:01:06 +00:00
echo "* RSS [done]"