From 5bbd170638ac1d0c5ffa258f5f5f73e854f5b7f4 Mon Sep 17 00:00:00 2001 From: "Yann Esposito (Yogsototh)" Date: Fri, 6 Dec 2019 15:34:36 +0100 Subject: [PATCH] enhanced optim-classes script --- build.sh | 5 +- optim-classes.sh | 38 +++++-- src/posts/0009-optim-nojs-website/index.org | 110 ++++++++++++++++++++ 3 files changed, 141 insertions(+), 12 deletions(-) create mode 100644 src/posts/0009-optim-nojs-website/index.org diff --git a/build.sh b/build.sh index dadc77a..c56722e 100755 --- a/build.sh +++ b/build.sh @@ -8,10 +8,9 @@ echo "Optim HTML size" ./optim-html.sh echo "Gen themes clones" ./dup-for-themes.sh -echo "Update file size" -./update-file-size.sh echo "Building RSS" ./mkrss.sh -echo "RSS Built" echo "Optim Classes accross CSS/HTML" ./optim-classes.sh +echo "Update file size" +./update-file-size.sh diff --git a/optim-classes.sh b/optim-classes.sh index 33357d1..29a11c8 100755 --- a/optim-classes.sh +++ b/optim-classes.sh @@ -1,6 +1,23 @@ #!/bin/zsh -classes=( $( {cat _site/**/*.html(N) | perl -p -e 's/class="?([a-zA-Z0-9_-]*)/\nCLASS: $1\n/g'; cat _site/**/*.css(N) | perl -p -e 's/\.([a-zA-Z-_][a-zA-Z0-9-_]*)/\nCLASS: $1\n/g'}|grep CLASS|sort -u|cut -d\ -f 2,2|awk 'length($1)>2 {print length($1),$1}'|sort -n|cut -d\ -f 2,2) ) +webdir="_site" + +retrieve_classes_in_html () { + cat $webdir/**/*.html(N) | \ + perl -pe 's/class="?([a-zA-Z0-9_-]*)/\nCLASS: $1\n/g' +} + +retrieve_classes_in_css () { + cat $webdir/**/*.css(N) | \ + perl -pe 's/\.([a-zA-Z-_][a-zA-Z0-9-_]*)/\nCLASS: $1\n/g' +} + +classes=( $( {retrieve_classes_in_html; retrieve_classes_in_css}| \ + egrep "^CLASS: [^ ]*$" |\ + sort -u | \ + awk 'length($2)>2 && $2 != "web-file-size" {print length($2),$2}'|\ + sort -rn | \ + awk '{print $2}') ) chr() { [ "$1" -lt 26 ] || return 1 @@ -24,16 +41,19 @@ for c in $classes; do ((i++)) done +htmlreplacer='' +cssreplacer='' +for long in $classes; do + htmlreplacer=$htmlreplacer's#class=("?)'${long}'#class=$1'${assoc[$long]}'#g;' + cssreplacer=$cssreplacer's#\.'${long}'#.'${assoc[$long]}'#g;' +done -for fic in _site/**/*.{html,xml}(N); do + +for fic in $webdir/**/*.{html,xml}(N); do print -- $fic - for long in $classes; do - perl -pi -e 's#class=("?)'${long}'#class=$1'${assoc[$long]}'#g' $fic - done + perl -pi -e $htmlreplacer $fic done -for fic in _site/**/*.css(N); do +for fic in $webdir/**/*.css(N); do echo $fic - for long in $classes; do - perl -pi -e 's#\.'"${long}"'#.'"${assoc[$long]}"'#g' $fic - done + perl -pi -e $cssreplacer $fic done diff --git a/src/posts/0009-optim-nojs-website/index.org b/src/posts/0009-optim-nojs-website/index.org new file mode 100644 index 0000000..59e5e7c --- /dev/null +++ b/src/posts/0009-optim-nojs-website/index.org @@ -0,0 +1,110 @@ +#+TITLE: Optimize the size of no js websites +#+AUTHOR: Yann Esposito +#+EMAIL: yann@esposito.host +#+DATE: [2019-12-06 Fri] +#+KEYWORDS: blog, shell, script +#+DESCRIPTION: How to optimize the size of a full website by using information in both HTML and CSS. +#+OPTIONS: auto-id:t toc:nil + +One of the major problem with CSS and HTML is that they are highly +dependent from each other. +For example, if you want to minimize your CSS, you are still forced to use +the same class names even if they are long. +Because the HTML uses them. +And the same problem arise when you want to minimize the size of your HTML +files. + +It means that if you want to minimize a full website you must take care at +the same time of HTML pages as well as CSS pages. +And this is totally impossible to achieve if JS is involved because there +is always the risk the JS code generate class names to manipulate the DOM. + +So here is a small script I wanted to write from a long time that do the following: + +1. retrieve all class names in the HTML and in the CSS +2. create an associative from those long names to shorter names +3. replace the class names in the HTML and CSS files. + +Here is my quick and dirty script doing that: + +#+name: optim-classes.sh +#+begin_src bash +#!/bin/zsh + +webdir="_site" + +retrieve_classes_in_html () { + cat $webdir/**/*.html(N) | \ + perl -pe 's/class="?([a-zA-Z0-9_-]*)/\nCLASS: $1\n/g' +} + +retrieve_classes_in_css () { + cat $webdir/**/*.css(N) | \ + perl -pe 's/\.([a-zA-Z-_][a-zA-Z0-9-_]*)/\nCLASS: $1\n/g' +} + +classes=( $( {retrieve_classes_in_html; retrieve_classes_in_css}| \ + egrep "^CLASS: [^ ]*$" |\ + sort -u | \ + awk 'length($2)>2 {print length($2),$2}'|\ + sort -rn | \ + awk '{print $2}') ) + +chr() { + [ "$1" -lt 26 ] || return 1 + printf "\\$(printf '%03o' $(( 97 + $1 )))" +} + +shortName() { + if [ "$1" -gt 25 ]; then + print -- $(shortName $(( ( $1 / 26 ) - 1 )))$(shortName $(( $1 % 26 ))) + else + chr $1 + fi +} + +i=0; +typeset -A assoc +for c in $classes; do + sn=$(shortName $i) + print "$c -> $sn" + assoc[$c]=$sn + ((i++)) +done + +htmlreplacer='' +cssreplacer='' +for long in $classes; do + htmlreplacer=$htmlreplacer's#class=("?)'${long}'#class=$1'${assoc[$long]}'#g;' + cssreplacer=$cssreplacer's#\.'${long}'#.'${assoc[$long]}'#g;' +done + + +for fic in $webdir/**/*.{html,xml}(N); do + print -- $fic + perl -pi -e $htmlreplacer $fic +done +for fic in $webdir/**/*.css(N); do + echo $fic + perl -pi -e $cssreplacer $fic +done +#+end_src + +A few remarks: + +- to prevent doing the work twice, the script only takes care for classe + names longer or equal to 3 chars. (=awk 'length($2)>2 {print + length($2),$2}'=). As consequence take care that your website does not + use class name shorter than 3 chars otherwise it could mess with your css. +- The script do not change ids because those can be used for anchors and + thus can be part of public URLs. +- The script replace the classes with the longuest name first to prevent + bug if one class name is a prefix of another one. +- We generate a long perl script to launch perl just once, this make the + full find and replace way faster. + +Of course this could be improved by providing the shortest name to the most +used classes, and also by using a better =shortName= function that could +use more chars. +But just this quick and dirty script already does a better work than +existing methods that do not take into account all the CSS and HTML files.