diff options
-rw-r--r-- | atom.php | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/atom.php b/atom.php new file mode 100644 index 0000000..dd93725 --- /dev/null +++ b/atom.php @@ -0,0 +1,106 @@ +<?php +/* + * Downloads an Atom feed and enqueues each entry on the queue. + * + * Saves the timestamp of the first entry and uses that to check for + * duplicates. + * + * It will not follow 'next' relation. + * + * This implicitly assumes that the feed is ordered with the newest + * entry first. + */ + +$url = $argv[1]; +$queue = $argv[2]; +$state_file = $argv[3]; + +function debug($str) { + return; + echo($str); +} + +debug("Feed=$url\n"); +debug("Queue=$queue\n"); +debug("State=$state_file\n"); + +$format = "Y-m-d\\TH:i:s\\Z"; +$timezone = new DateTimeZone("UTC"); + +$state = NULL; +if(is_readable($state_file)) { + $state = file($state_file, FILE_IGNORE_NEW_LINES); + $state = DateTime::createFromFormat($format, $state[0], $timezone); + if(!$state) { + echo("Unable to parse current state file.\n"); + print_r(DateTime::getLastErrors()); + } +} +$newest = NULL; + +libxml_use_internal_errors(true); +$sxe = simplexml_load_file($url); +if (!$sxe) { + echo "Failed loading XML from $url\n"; + foreach(libxml_get_errors() as $error) { + echo $error->message, "\n"; + } + exit(1); +} + +foreach($sxe->entry as $entry) { + $id = $entry->id; + $title = $entry->title; + $published = $entry->published; + $updated = $entry->updated; + + if($updated == NULL) { + $updated = $published; + } + $updated = DateTime::createFromFormat($format, $updated, $timezone); +# echo "updated=" . $updated->format($format) . "\n"; + + if(!isset($id)) { + echo "Invalid atom entry, missing <id> from <entry>."; + continue; + } + + if($updated === FALSE) { + echo "Invalid atom entry, missing/invalid <published> or <updated> from <entry>."; + continue; + } + + if($state != NULL && $state >= $updated) { +# echo "old entry: $id\n"; + continue; + } + + echo "New entry: $id\n"; + + if($newest == NULL || $newest < $updated) { + $newest = $updated; + } + + $entry_file = tmpfile() or exit("Unable to open temporary file"); + fwrite($entry_file, $entry->asXML()) or exit("Unable to write xml");; + fseek($entry_file, 0); + + $curl = curl_init() or exit("Unable to allocate cURL object"); + curl_setopt($curl, CURLOPT_URL, $queue); +# curl_setopt($curl, CURLOPT_VERBOSE, TRUE); + curl_setopt($curl, CURLOPT_INFILE, $entry_file); + curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "POST"); + curl_setopt($curl, CURLOPT_HTTPHEADER, array("Content-Type: application/atom+xml;type=entry", "trygve: kul")); + curl_exec($curl) or exit("Error while executing cURL"); + + // if success, pick out Location headers. + + fclose($entry_file); + curl_close($curl); +} + +if(isset($newest)) { + file_put_contents($state_file, $newest->format($format) . "\n"); +} + +?> |