diff --git a/mkrss.sh b/mkrss.sh index 4ac55c0..8507107 100755 --- a/mkrss.sh +++ b/mkrss.sh @@ -16,7 +16,6 @@ websiteurl="https://her.esy.fun" rssdescription="her.esy.fun articles, mostly random personal thoughts" rsslang="en" rssauthor="yann@esposito.host (Yann Esposito)" -rssimgtitle="yogsototh" rssimgurl="https://her.esy.fun/img/FlatAvatar.png" # HTML Accessors (similar to CSS accessors) @@ -34,7 +33,9 @@ formatdate() { finddate(){ < $1 hxselect -c $dateaccessor } findtitle(){ < $1 hxselect -c $titleaccessor } -getcontent(){ < $1 hxselect $contentaccessor } +getcontent(){ + < $1 hxselect $contentaccessor | \ + perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' } findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' } mkcategories(){ for keyword in $*; do @@ -61,12 +62,13 @@ for fic in $postsdir/**/*.html; do keywords=( $(findkeywords $xfic) ) printf ": %-55s" "$title ($keywords)" categories=$(mkcategories $keywords) + absoluteurl="${websiteurl}/${blogfile}" { printf "\\n" printf "\\n%s" "$title" - printf "\\n%s" "${websiteurl}/${blogfile}" + printf "\\n%s" "$absoluteurl" printf "\\n%s%s" "$rssdate" printf "%s" "$categories" - printf "\\n" "$(getcontent "$xfic")" + printf "\\n" "$(getcontent "$xfic" "$absoluteurl")" printf "\\n\\n\\n" } >> "$tmpdir/${d}-$(basename $fic).rss" dates=( $d $dates ) @@ -104,7 +106,7 @@ cat <${rssauthor} ${rssimgurl} - ${rssimgtitle} + ${rsstitle} ${websiteurl} END diff --git a/shell.nix b/shell.nix index 67e388e..6f6e1c2 100644 --- a/shell.nix +++ b/shell.nix @@ -1,5 +1,5 @@ # { pkgs ? import {} }: { pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }: pkgs.mkShell { - buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh ]; + buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh pkgs.perl pkgs.perlPackages.URI ]; } diff --git a/src/posts/rss-gen.org b/src/posts/rss-gen.org index 60b2742..cc80cf7 100644 --- a/src/posts/rss-gen.org +++ b/src/posts/rss-gen.org @@ -90,7 +90,10 @@ formatdate() { finddate(){ < $1 hxselect -c $dateaccessor } findtitle(){ < $1 hxselect -c $titleaccessor } -getcontent(){ < $1 hxselect $contentaccessor } +# retrieve the content, take care of using absolute URL +getcontent(){ + < $1 hxselect $contentaccessor | \ + perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' } findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' } mkcategories(){ for keyword in $*; do @@ -125,12 +128,13 @@ for fic in $postsdir/**/*.html; do printf ": %-55s" "$title ($keywords)" # up until here, we extracted the informations we need for the item categories=$(mkcategories $keywords) + absoluteurl="${websiteurl}/${blogfile}" { printf "\\n" printf "\\n%s" "$title" - printf "\\n%s" "${websiteurl}/${blogfile}" + printf "\\n%s" "$absoluteurl" printf "\\n%s%s" "$rssdate" printf "%s" "$categories" - printf "\\n" "$(getcontent "$xfic")" + printf "\\n" "$(getcontent "$xfic" "$absoluteurl")" printf "\\n\\n\\n" } >> "$tmpdir/${d}-$(basename $fic).rss" # we append the date to the list of dates @@ -218,13 +222,15 @@ It takes care that =zsh=, =coreutils= and =html-xml-utils= are installed before running my script. For example my script uses =date= from GNU coreutils and not the =BSD= date from my OS, which makes the script more portable. +This also take care of using the URI perl package. Along my script I have a =shell.nix= file containing: #+begin_src nix +# { pkgs ? import {} }: { pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }: pkgs.mkShell { - buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh ]; + buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh pkgs.perl pkgs.perlPackages.URI ]; } #+end_src diff --git a/src/posts/rss-gen/mkrss.sh b/src/posts/rss-gen/mkrss.sh index 4ac55c0..eafa2be 100755 --- a/src/posts/rss-gen/mkrss.sh +++ b/src/posts/rss-gen/mkrss.sh @@ -34,7 +34,9 @@ formatdate() { finddate(){ < $1 hxselect -c $dateaccessor } findtitle(){ < $1 hxselect -c $titleaccessor } -getcontent(){ < $1 hxselect $contentaccessor } +getcontent(){ + < $1 hxselect $contentaccessor | \ + perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' } findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' } mkcategories(){ for keyword in $*; do @@ -61,12 +63,13 @@ for fic in $postsdir/**/*.html; do keywords=( $(findkeywords $xfic) ) printf ": %-55s" "$title ($keywords)" categories=$(mkcategories $keywords) + absoluteurl="${websiteurl}/${blogfile}" { printf "\\n" printf "\\n%s" "$title" - printf "\\n%s" "${websiteurl}/${blogfile}" + printf "\\n%s" "$absoluteurl" printf "\\n%s%s" "$rssdate" printf "%s" "$categories" - printf "\\n" "$(getcontent "$xfic")" + printf "\\n" "$(getcontent "$xfic" "$absoluteurl")" printf "\\n\\n\\n" } >> "$tmpdir/${d}-$(basename $fic).rss" dates=( $d $dates )