From ee59534aac4b07a54cf56fc0b2383c3b27185128 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Fri, 6 Jul 2012 23:17:37 +0200 Subject: o Trying to include more stuff. --- .gitignore | 2 ++ tv.nrk.no.xsl | 29 +++++++++++++++++++++++++++-- update-feed.sh | 39 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index f45b400..c36659b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ *.atom +tv.nrk.no.html +tmp diff --git a/tv.nrk.no.xsl b/tv.nrk.no.xsl index a825181..b4c53d5 100644 --- a/tv.nrk.no.xsl +++ b/tv.nrk.no.xsl @@ -1,6 +1,8 @@ @@ -8,16 +10,38 @@ Recently Sent from NRK urn:trygvis:2012,07,tv.nrk.no,recently-sent - + - + + + + + + + + +
+ +
+
+ + +
diff --git a/update-feed.sh b/update-feed.sh index 4afd168..122db80 100755 --- a/update-feed.sh +++ b/update-feed.sh @@ -1,7 +1,6 @@ #!/bin/bash set -e -set -x checksum() { [ -r $1 ] && md5sum $1 | cut -f 1 -d ' ' @@ -22,4 +21,40 @@ fetch() { fetch -cat tv.nrk.no.html | xmlstarlet tr ./tv.nrk.no.xsl > tv.nrk.no.atom +mkdir -p tmp + +cat tv.nrk.no.html | \ + xmlstarlet sel -N strings=http://exslt.org/strings -t -m '//a[@class="listobject-link"]' \ + -v '@href' -v '" "' \ + -v 'strings:encode-uri(normalize-space(.), true())' -n \ + | while read url name +do + if [ "$name" == "" -o -r "$name" ] + then + continue + fi + + echo Url: $url + echo Name: $name + if [ -r "tmp/$name" ] + then + z="tmp/$name" + else + z="Jan 1 1970" + fi + + curl -v -s -z "$z" -L "$url" -o "tmp/$name" + rm -f "tmp/$name.html" "tmp/$name.xml" + set +e + cat "tmp/$name" | dos2unix | \ + tidy -utf8 -asxhtml -quiet -f /dev/null \ + --new-inline-tags "time" \ + --new-blocklevel-tags "article, hgroup, section, header, footer, mark, aside" \ + > "tmp/$name.html" + echo ret=$? + set -e + cat "tmp/$name.html" | xmllint --format --xmlout - >"tmp/${name}.xml" +done + +cat tv.nrk.no.html | xmlstarlet tr ./tv.nrk.no.xsl | tee tv.nrk.no.atom +#rm -rf tmp -- cgit v1.2.3