#!/bin/bash set -e checksum() { [ -r $1 ] && md5sum $1 | cut -f 1 -d ' ' } fetch() { curl -s http://tv.nrk.no/listobjects/recentlysent | dos2unix | xmllint --html --dropdtd --xmlout - 2> /dev/null > tv.nrk.no.html.new if [ -r tv.nrk.no.html -a "`checksum tv.nrk.no.html.new`" == "`checksum tv.nrk.no.html`" ] then rm -f tv.nrk.no.html.new echo "Already up to date." exit 0 fi mv tv.nrk.no.html.new tv.nrk.no.html } fetch mkdir -p tmp cat tv.nrk.no.html | \ xmlstarlet sel -N strings=http://exslt.org/strings -t -m '//a[@class="listobject-link"]' \ -v '@href' -v '" "' \ -v 'strings:encode-uri(normalize-space(.), true())' -n \ | while read url name do if [ "$name" == "" -o -r "$name" ] then continue fi echo Url: $url echo Name: $name if [ -r "tmp/$name" ] then z="tmp/$name" else z="Jan 1 1970" fi curl -v -s -z "$z" -L "$url" -o "tmp/$name" rm -f "tmp/$name.html" "tmp/$name.xml" set +e cat "tmp/$name" | dos2unix | \ tidy -utf8 -asxhtml -quiet -f /dev/null \ --new-inline-tags "time" \ --new-blocklevel-tags "article, hgroup, section, header, footer, mark, aside" \ > "tmp/$name.html" echo ret=$? set -e cat "tmp/$name.html" | xmllint --format --xmlout - >"tmp/${name}.xml" done cat tv.nrk.no.html | xmlstarlet tr ./tv.nrk.no.xsl > tv.nrk.no.atom