updated RSS gen again with absolut path made it fully valid

This commit is contained in:
Yann Esposito (Yogsototh) 2019-10-01 07:44:03 +02:00
parent 85de18b02b
commit 4db6bae65a
Signed by untrusted user who does not match committer: yogsototh
GPG key ID: 7B19A4C650D59646
4 changed files with 24 additions and 13 deletions

View file

@ -16,7 +16,6 @@ websiteurl="https://her.esy.fun"
rssdescription="her.esy.fun articles, mostly random personal thoughts" rssdescription="her.esy.fun articles, mostly random personal thoughts"
rsslang="en" rsslang="en"
rssauthor="yann@esposito.host (Yann Esposito)" rssauthor="yann@esposito.host (Yann Esposito)"
rssimgtitle="yogsototh"
rssimgurl="https://her.esy.fun/img/FlatAvatar.png" rssimgurl="https://her.esy.fun/img/FlatAvatar.png"
# HTML Accessors (similar to CSS accessors) # HTML Accessors (similar to CSS accessors)
@ -34,7 +33,9 @@ formatdate() {
finddate(){ < $1 hxselect -c $dateaccessor } finddate(){ < $1 hxselect -c $dateaccessor }
findtitle(){ < $1 hxselect -c $titleaccessor } findtitle(){ < $1 hxselect -c $titleaccessor }
getcontent(){ < $1 hxselect $contentaccessor } getcontent(){
< $1 hxselect $contentaccessor | \
perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' }
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' } findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' }
mkcategories(){ mkcategories(){
for keyword in $*; do for keyword in $*; do
@ -61,12 +62,13 @@ for fic in $postsdir/**/*.html; do
keywords=( $(findkeywords $xfic) ) keywords=( $(findkeywords $xfic) )
printf ": %-55s" "$title ($keywords)" printf ": %-55s" "$title ($keywords)"
categories=$(mkcategories $keywords) categories=$(mkcategories $keywords)
absoluteurl="${websiteurl}/${blogfile}"
{ printf "\\n<item>" { printf "\\n<item>"
printf "\\n<title>%s</title>" "$title" printf "\\n<title>%s</title>" "$title"
printf "\\n<guid>%s</guid>" "${websiteurl}/${blogfile}" printf "\\n<guid>%s</guid>" "$absoluteurl"
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate" printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
printf "%s" "$categories" printf "%s" "$categories"
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic")" printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic" "$absoluteurl")"
printf "\\n</item>\\n\\n" printf "\\n</item>\\n\\n"
} >> "$tmpdir/${d}-$(basename $fic).rss" } >> "$tmpdir/${d}-$(basename $fic).rss"
dates=( $d $dates ) dates=( $d $dates )
@ -104,7 +106,7 @@ cat <<END
<webMaster>${rssauthor}</webMaster> <webMaster>${rssauthor}</webMaster>
<image> <image>
<url>${rssimgurl}</url> <url>${rssimgurl}</url>
<title>${rssimgtitle}</title> <title>${rsstitle}</title>
<link>${websiteurl}</link> <link>${websiteurl}</link>
</image> </image>
END END

View file

@ -1,5 +1,5 @@
# { pkgs ? import <nixpkgs> {} }: # { pkgs ? import <nixpkgs> {} }:
{ pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }: { pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }:
pkgs.mkShell { pkgs.mkShell {
buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh ]; buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh pkgs.perl pkgs.perlPackages.URI ];
} }

View file

@ -90,7 +90,10 @@ formatdate() {
finddate(){ < $1 hxselect -c $dateaccessor } finddate(){ < $1 hxselect -c $dateaccessor }
findtitle(){ < $1 hxselect -c $titleaccessor } findtitle(){ < $1 hxselect -c $titleaccessor }
getcontent(){ < $1 hxselect $contentaccessor } # retrieve the content, take care of using absolute URL
getcontent(){
< $1 hxselect $contentaccessor | \
perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' }
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' } findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' }
mkcategories(){ mkcategories(){
for keyword in $*; do for keyword in $*; do
@ -125,12 +128,13 @@ for fic in $postsdir/**/*.html; do
printf ": %-55s" "$title ($keywords)" printf ": %-55s" "$title ($keywords)"
# up until here, we extracted the informations we need for the item # up until here, we extracted the informations we need for the item
categories=$(mkcategories $keywords) categories=$(mkcategories $keywords)
absoluteurl="${websiteurl}/${blogfile}"
{ printf "\\n<item>" { printf "\\n<item>"
printf "\\n<title>%s</title>" "$title" printf "\\n<title>%s</title>" "$title"
printf "\\n<guid>%s</guid>" "${websiteurl}/${blogfile}" printf "\\n<guid>%s</guid>" "$absoluteurl"
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate" printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
printf "%s" "$categories" printf "%s" "$categories"
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic")" printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic" "$absoluteurl")"
printf "\\n</item>\\n\\n" printf "\\n</item>\\n\\n"
} >> "$tmpdir/${d}-$(basename $fic).rss" } >> "$tmpdir/${d}-$(basename $fic).rss"
# we append the date to the list of dates # we append the date to the list of dates
@ -218,13 +222,15 @@ It takes care that =zsh=, =coreutils= and =html-xml-utils= are installed
before running my script. before running my script.
For example my script uses =date= from GNU coreutils and not the =BSD= date For example my script uses =date= from GNU coreutils and not the =BSD= date
from my OS, which makes the script more portable. from my OS, which makes the script more portable.
This also take care of using the URI perl package.
Along my script I have a =shell.nix= file containing: Along my script I have a =shell.nix= file containing:
#+begin_src nix #+begin_src nix
# { pkgs ? import <nixpkgs> {} }:
{ pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }: { pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }:
pkgs.mkShell { pkgs.mkShell {
buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh ]; buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh pkgs.perl pkgs.perlPackages.URI ];
} }
#+end_src #+end_src

View file

@ -34,7 +34,9 @@ formatdate() {
finddate(){ < $1 hxselect -c $dateaccessor } finddate(){ < $1 hxselect -c $dateaccessor }
findtitle(){ < $1 hxselect -c $titleaccessor } findtitle(){ < $1 hxselect -c $titleaccessor }
getcontent(){ < $1 hxselect $contentaccessor } getcontent(){
< $1 hxselect $contentaccessor | \
perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' }
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' } findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' }
mkcategories(){ mkcategories(){
for keyword in $*; do for keyword in $*; do
@ -61,12 +63,13 @@ for fic in $postsdir/**/*.html; do
keywords=( $(findkeywords $xfic) ) keywords=( $(findkeywords $xfic) )
printf ": %-55s" "$title ($keywords)" printf ": %-55s" "$title ($keywords)"
categories=$(mkcategories $keywords) categories=$(mkcategories $keywords)
absoluteurl="${websiteurl}/${blogfile}"
{ printf "\\n<item>" { printf "\\n<item>"
printf "\\n<title>%s</title>" "$title" printf "\\n<title>%s</title>" "$title"
printf "\\n<guid>%s</guid>" "${websiteurl}/${blogfile}" printf "\\n<guid>%s</guid>" "$absoluteurl"
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate" printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
printf "%s" "$categories" printf "%s" "$categories"
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic")" printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic" "$absoluteurl")"
printf "\\n</item>\\n\\n" printf "\\n</item>\\n\\n"
} >> "$tmpdir/${d}-$(basename $fic).rss" } >> "$tmpdir/${d}-$(basename $fic).rss"
dates=( $d $dates ) dates=( $d $dates )