diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | tv.nrk.no.xsl | 29 | ||||
-rwxr-xr-x | update-feed.sh | 39 |
3 files changed, 66 insertions, 4 deletions
@@ -1 +1,3 @@ *.atom +tv.nrk.no.html +tmp diff --git a/tv.nrk.no.xsl b/tv.nrk.no.xsl index a825181..b4c53d5 100644 --- a/tv.nrk.no.xsl +++ b/tv.nrk.no.xsl @@ -1,6 +1,8 @@ <?xml version="1.0"?> <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:strings="http://exslt.org/strings" + xmlns:html="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/2005/Atom"> <xsl:output indent="yes"/> <xsl:template match="/html"> @@ -8,16 +10,38 @@ <title>Recently Sent from NRK</title> <id>urn:trygvis:2012,07,tv.nrk.no,recently-sent</id> <xsl:for-each select="//li"> -<xsl:call-template name="li"/> + <xsl:call-template name="li"/> </xsl:for-each> </feed> </xsl:template> <xsl:template name="li"> <entry> - <id><xsl:value-of select="a/@href"/></id> + <xsl:variable name='href' select='a/@href'/> + <xsl:variable name='filename' select='concat("tmp/", strings:encode-uri(normalize-space(a), true()), ".xml")'/> + <xsl:variable name='show' select='document($filename)'/> + <!-- + <xsl:variable name='href' select='"heli-hogst.html"'/> + --> + <id><xsl:value-of select="$href"/></id> <!-- <id>urn:trygvis:2012,07,tv.nrk.no,item,<xsl:value-of select='normalize-space(a)'/></id> --> + <!-- + /html/body/div/div/div/div/section/article/div/div/section[@id='information'] + --> + <!-- + href: <xsl:value-of select="$href"/> + filename: <xsl:value-of select="$filename"/> + --> + <content type="xhtml" xml:lang="en"> + <div xmlns="http://www.w3.org/1999/xhtml"> + <xsl:copy-of select="$show/html:html//html:section[@id='information']/*"/> + </div> + </content> + <!-- + --> + + <!-- <title><xsl:value-of select='normalize-space(a)'/></title> <updated><xsl:value-of select="a/strong/time/@datetime"/></updated> <author><name>Trygve Laugstøl</name></author> @@ -27,6 +51,7 @@ <summary> <xsl:value-of select="normalize-space(div[@class='stack-links'])"/> </summary> + --> </entry> </xsl:template> </xsl:stylesheet> diff --git a/update-feed.sh b/update-feed.sh index 4afd168..122db80 100755 --- a/update-feed.sh +++ b/update-feed.sh @@ -1,7 +1,6 @@ #!/bin/bash set -e -set -x checksum() { [ -r $1 ] && md5sum $1 | cut -f 1 -d ' ' @@ -22,4 +21,40 @@ fetch() { fetch -cat tv.nrk.no.html | xmlstarlet tr ./tv.nrk.no.xsl > tv.nrk.no.atom +mkdir -p tmp + +cat tv.nrk.no.html | \ + xmlstarlet sel -N strings=http://exslt.org/strings -t -m '//a[@class="listobject-link"]' \ + -v '@href' -v '" "' \ + -v 'strings:encode-uri(normalize-space(.), true())' -n \ + | while read url name +do + if [ "$name" == "" -o -r "$name" ] + then + continue + fi + + echo Url: $url + echo Name: $name + if [ -r "tmp/$name" ] + then + z="tmp/$name" + else + z="Jan 1 1970" + fi + + curl -v -s -z "$z" -L "$url" -o "tmp/$name" + rm -f "tmp/$name.html" "tmp/$name.xml" + set +e + cat "tmp/$name" | dos2unix | \ + tidy -utf8 -asxhtml -quiet -f /dev/null \ + --new-inline-tags "time" \ + --new-blocklevel-tags "article, hgroup, section, header, footer, mark, aside" \ + > "tmp/$name.html" + echo ret=$? + set -e + cat "tmp/$name.html" | xmllint --format --xmlout - >"tmp/${name}.xml" +done + +cat tv.nrk.no.html | xmlstarlet tr ./tv.nrk.no.xsl | tee tv.nrk.no.atom +#rm -rf tmp |