updated RSS gen again with absolut path made it fully valid
This commit is contained in:
parent
85de18b02b
commit
4db6bae65a
12
mkrss.sh
12
mkrss.sh
|
@ -16,7 +16,6 @@ websiteurl="https://her.esy.fun"
|
||||||
rssdescription="her.esy.fun articles, mostly random personal thoughts"
|
rssdescription="her.esy.fun articles, mostly random personal thoughts"
|
||||||
rsslang="en"
|
rsslang="en"
|
||||||
rssauthor="yann@esposito.host (Yann Esposito)"
|
rssauthor="yann@esposito.host (Yann Esposito)"
|
||||||
rssimgtitle="yogsototh"
|
|
||||||
rssimgurl="https://her.esy.fun/img/FlatAvatar.png"
|
rssimgurl="https://her.esy.fun/img/FlatAvatar.png"
|
||||||
|
|
||||||
# HTML Accessors (similar to CSS accessors)
|
# HTML Accessors (similar to CSS accessors)
|
||||||
|
@ -34,7 +33,9 @@ formatdate() {
|
||||||
|
|
||||||
finddate(){ < $1 hxselect -c $dateaccessor }
|
finddate(){ < $1 hxselect -c $dateaccessor }
|
||||||
findtitle(){ < $1 hxselect -c $titleaccessor }
|
findtitle(){ < $1 hxselect -c $titleaccessor }
|
||||||
getcontent(){ < $1 hxselect $contentaccessor }
|
getcontent(){
|
||||||
|
< $1 hxselect $contentaccessor | \
|
||||||
|
perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' }
|
||||||
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' }
|
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' }
|
||||||
mkcategories(){
|
mkcategories(){
|
||||||
for keyword in $*; do
|
for keyword in $*; do
|
||||||
|
@ -61,12 +62,13 @@ for fic in $postsdir/**/*.html; do
|
||||||
keywords=( $(findkeywords $xfic) )
|
keywords=( $(findkeywords $xfic) )
|
||||||
printf ": %-55s" "$title ($keywords)"
|
printf ": %-55s" "$title ($keywords)"
|
||||||
categories=$(mkcategories $keywords)
|
categories=$(mkcategories $keywords)
|
||||||
|
absoluteurl="${websiteurl}/${blogfile}"
|
||||||
{ printf "\\n<item>"
|
{ printf "\\n<item>"
|
||||||
printf "\\n<title>%s</title>" "$title"
|
printf "\\n<title>%s</title>" "$title"
|
||||||
printf "\\n<guid>%s</guid>" "${websiteurl}/${blogfile}"
|
printf "\\n<guid>%s</guid>" "$absoluteurl"
|
||||||
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
|
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
|
||||||
printf "%s" "$categories"
|
printf "%s" "$categories"
|
||||||
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic")"
|
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic" "$absoluteurl")"
|
||||||
printf "\\n</item>\\n\\n"
|
printf "\\n</item>\\n\\n"
|
||||||
} >> "$tmpdir/${d}-$(basename $fic).rss"
|
} >> "$tmpdir/${d}-$(basename $fic).rss"
|
||||||
dates=( $d $dates )
|
dates=( $d $dates )
|
||||||
|
@ -104,7 +106,7 @@ cat <<END
|
||||||
<webMaster>${rssauthor}</webMaster>
|
<webMaster>${rssauthor}</webMaster>
|
||||||
<image>
|
<image>
|
||||||
<url>${rssimgurl}</url>
|
<url>${rssimgurl}</url>
|
||||||
<title>${rssimgtitle}</title>
|
<title>${rsstitle}</title>
|
||||||
<link>${websiteurl}</link>
|
<link>${websiteurl}</link>
|
||||||
</image>
|
</image>
|
||||||
END
|
END
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# { pkgs ? import <nixpkgs> {} }:
|
# { pkgs ? import <nixpkgs> {} }:
|
||||||
{ pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }:
|
{ pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }:
|
||||||
pkgs.mkShell {
|
pkgs.mkShell {
|
||||||
buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh ];
|
buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh pkgs.perl pkgs.perlPackages.URI ];
|
||||||
}
|
}
|
||||||
|
|
|
@ -90,7 +90,10 @@ formatdate() {
|
||||||
|
|
||||||
finddate(){ < $1 hxselect -c $dateaccessor }
|
finddate(){ < $1 hxselect -c $dateaccessor }
|
||||||
findtitle(){ < $1 hxselect -c $titleaccessor }
|
findtitle(){ < $1 hxselect -c $titleaccessor }
|
||||||
getcontent(){ < $1 hxselect $contentaccessor }
|
# retrieve the content, take care of using absolute URL
|
||||||
|
getcontent(){
|
||||||
|
< $1 hxselect $contentaccessor | \
|
||||||
|
perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' }
|
||||||
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' }
|
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' }
|
||||||
mkcategories(){
|
mkcategories(){
|
||||||
for keyword in $*; do
|
for keyword in $*; do
|
||||||
|
@ -125,12 +128,13 @@ for fic in $postsdir/**/*.html; do
|
||||||
printf ": %-55s" "$title ($keywords)"
|
printf ": %-55s" "$title ($keywords)"
|
||||||
# up until here, we extracted the informations we need for the item
|
# up until here, we extracted the informations we need for the item
|
||||||
categories=$(mkcategories $keywords)
|
categories=$(mkcategories $keywords)
|
||||||
|
absoluteurl="${websiteurl}/${blogfile}"
|
||||||
{ printf "\\n<item>"
|
{ printf "\\n<item>"
|
||||||
printf "\\n<title>%s</title>" "$title"
|
printf "\\n<title>%s</title>" "$title"
|
||||||
printf "\\n<guid>%s</guid>" "${websiteurl}/${blogfile}"
|
printf "\\n<guid>%s</guid>" "$absoluteurl"
|
||||||
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
|
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
|
||||||
printf "%s" "$categories"
|
printf "%s" "$categories"
|
||||||
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic")"
|
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic" "$absoluteurl")"
|
||||||
printf "\\n</item>\\n\\n"
|
printf "\\n</item>\\n\\n"
|
||||||
} >> "$tmpdir/${d}-$(basename $fic).rss"
|
} >> "$tmpdir/${d}-$(basename $fic).rss"
|
||||||
# we append the date to the list of dates
|
# we append the date to the list of dates
|
||||||
|
@ -218,13 +222,15 @@ It takes care that =zsh=, =coreutils= and =html-xml-utils= are installed
|
||||||
before running my script.
|
before running my script.
|
||||||
For example my script uses =date= from GNU coreutils and not the =BSD= date
|
For example my script uses =date= from GNU coreutils and not the =BSD= date
|
||||||
from my OS, which makes the script more portable.
|
from my OS, which makes the script more portable.
|
||||||
|
This also take care of using the URI perl package.
|
||||||
|
|
||||||
Along my script I have a =shell.nix= file containing:
|
Along my script I have a =shell.nix= file containing:
|
||||||
|
|
||||||
#+begin_src nix
|
#+begin_src nix
|
||||||
|
# { pkgs ? import <nixpkgs> {} }:
|
||||||
{ pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }:
|
{ pkgs ? import (fetchTarball https://github.com/NixOS/nixpkgs/archive/19.09-beta.tar.gz) {} }:
|
||||||
pkgs.mkShell {
|
pkgs.mkShell {
|
||||||
buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh ];
|
buildInputs = [ pkgs.coreutils pkgs.html-xml-utils pkgs.zsh pkgs.perl pkgs.perlPackages.URI ];
|
||||||
}
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,9 @@ formatdate() {
|
||||||
|
|
||||||
finddate(){ < $1 hxselect -c $dateaccessor }
|
finddate(){ < $1 hxselect -c $dateaccessor }
|
||||||
findtitle(){ < $1 hxselect -c $titleaccessor }
|
findtitle(){ < $1 hxselect -c $titleaccessor }
|
||||||
getcontent(){ < $1 hxselect $contentaccessor }
|
getcontent(){
|
||||||
|
< $1 hxselect $contentaccessor | \
|
||||||
|
perl -pe 'use URI; $base="'$2'"; s# (href|src)="((?!https?://)[^"]*)"#" ".$1."=\"".URI->new_abs($2,$base)->as_string."\""#eig' }
|
||||||
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' }
|
findkeywords(){ < $1 hxselect -c $keywordsaccessor | sed 's/,//g' }
|
||||||
mkcategories(){
|
mkcategories(){
|
||||||
for keyword in $*; do
|
for keyword in $*; do
|
||||||
|
@ -61,12 +63,13 @@ for fic in $postsdir/**/*.html; do
|
||||||
keywords=( $(findkeywords $xfic) )
|
keywords=( $(findkeywords $xfic) )
|
||||||
printf ": %-55s" "$title ($keywords)"
|
printf ": %-55s" "$title ($keywords)"
|
||||||
categories=$(mkcategories $keywords)
|
categories=$(mkcategories $keywords)
|
||||||
|
absoluteurl="${websiteurl}/${blogfile}"
|
||||||
{ printf "\\n<item>"
|
{ printf "\\n<item>"
|
||||||
printf "\\n<title>%s</title>" "$title"
|
printf "\\n<title>%s</title>" "$title"
|
||||||
printf "\\n<guid>%s</guid>" "${websiteurl}/${blogfile}"
|
printf "\\n<guid>%s</guid>" "$absoluteurl"
|
||||||
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
|
printf "\\n<pubDate>%s</pubDate>%s" "$rssdate"
|
||||||
printf "%s" "$categories"
|
printf "%s" "$categories"
|
||||||
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic")"
|
printf "\\n<description><![CDATA[\\n%s\\n]]></description>" "$(getcontent "$xfic" "$absoluteurl")"
|
||||||
printf "\\n</item>\\n\\n"
|
printf "\\n</item>\\n\\n"
|
||||||
} >> "$tmpdir/${d}-$(basename $fic).rss"
|
} >> "$tmpdir/${d}-$(basename $fic).rss"
|
||||||
dates=( $d $dates )
|
dates=( $d $dates )
|
||||||
|
|
Loading…
Reference in a new issue