summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--tv.nrk.no.xsl29
-rwxr-xr-xupdate-feed.sh39
3 files changed, 66 insertions, 4 deletions
diff --git a/.gitignore b/.gitignore
index f45b400..c36659b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
*.atom
+tv.nrk.no.html
+tmp
diff --git a/tv.nrk.no.xsl b/tv.nrk.no.xsl
index a825181..b4c53d5 100644
--- a/tv.nrk.no.xsl
+++ b/tv.nrk.no.xsl
@@ -1,6 +1,8 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:strings="http://exslt.org/strings"
+ xmlns:html="http://www.w3.org/1999/xhtml"
xmlns="http://www.w3.org/2005/Atom">
<xsl:output indent="yes"/>
<xsl:template match="/html">
@@ -8,16 +10,38 @@
<title>Recently Sent from NRK</title>
<id>urn:trygvis:2012,07,tv.nrk.no,recently-sent</id>
<xsl:for-each select="//li">
-<xsl:call-template name="li"/>
+ <xsl:call-template name="li"/>
</xsl:for-each>
</feed>
</xsl:template>
<xsl:template name="li">
<entry>
- <id><xsl:value-of select="a/@href"/></id>
+ <xsl:variable name='href' select='a/@href'/>
+ <xsl:variable name='filename' select='concat("tmp/", strings:encode-uri(normalize-space(a), true()), ".xml")'/>
+ <xsl:variable name='show' select='document($filename)'/>
+ <!--
+ <xsl:variable name='href' select='"heli-hogst.html"'/>
+ -->
+ <id><xsl:value-of select="$href"/></id>
<!--
<id>urn:trygvis:2012,07,tv.nrk.no,item,<xsl:value-of select='normalize-space(a)'/></id>
-->
+ <!--
+ /html/body/div/div/div/div/section/article/div/div/section[@id='information']
+ -->
+ <!--
+ href: <xsl:value-of select="$href"/>
+ filename: <xsl:value-of select="$filename"/>
+ -->
+ <content type="xhtml" xml:lang="en">
+ <div xmlns="http://www.w3.org/1999/xhtml">
+ <xsl:copy-of select="$show/html:html//html:section[@id='information']/*"/>
+ </div>
+ </content>
+ <!--
+ -->
+
+ <!--
<title><xsl:value-of select='normalize-space(a)'/></title>
<updated><xsl:value-of select="a/strong/time/@datetime"/></updated>
<author><name>Trygve Laugstøl</name></author>
@@ -27,6 +51,7 @@
<summary>
<xsl:value-of select="normalize-space(div[@class='stack-links'])"/>
</summary>
+ -->
</entry>
</xsl:template>
</xsl:stylesheet>
diff --git a/update-feed.sh b/update-feed.sh
index 4afd168..122db80 100755
--- a/update-feed.sh
+++ b/update-feed.sh
@@ -1,7 +1,6 @@
#!/bin/bash
set -e
-set -x
checksum() {
[ -r $1 ] && md5sum $1 | cut -f 1 -d ' '
@@ -22,4 +21,40 @@ fetch() {
fetch
-cat tv.nrk.no.html | xmlstarlet tr ./tv.nrk.no.xsl > tv.nrk.no.atom
+mkdir -p tmp
+
+cat tv.nrk.no.html | \
+ xmlstarlet sel -N strings=http://exslt.org/strings -t -m '//a[@class="listobject-link"]' \
+ -v '@href' -v '" "' \
+ -v 'strings:encode-uri(normalize-space(.), true())' -n \
+ | while read url name
+do
+ if [ "$name" == "" -o -r "$name" ]
+ then
+ continue
+ fi
+
+ echo Url: $url
+ echo Name: $name
+ if [ -r "tmp/$name" ]
+ then
+ z="tmp/$name"
+ else
+ z="Jan 1 1970"
+ fi
+
+ curl -v -s -z "$z" -L "$url" -o "tmp/$name"
+ rm -f "tmp/$name.html" "tmp/$name.xml"
+ set +e
+ cat "tmp/$name" | dos2unix | \
+ tidy -utf8 -asxhtml -quiet -f /dev/null \
+ --new-inline-tags "time" \
+ --new-blocklevel-tags "article, hgroup, section, header, footer, mark, aside" \
+ > "tmp/$name.html"
+ echo ret=$?
+ set -e
+ cat "tmp/$name.html" | xmllint --format --xmlout - >"tmp/${name}.xml"
+done
+
+cat tv.nrk.no.html | xmlstarlet tr ./tv.nrk.no.xsl | tee tv.nrk.no.atom
+#rm -rf tmp