summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2012-03-18 20:55:28 +0000
committerTrygve Laugstøl <trygvis@inamo.no>2012-03-18 20:55:28 +0000
commit7bd0049e355492e85339c9efa88bf6f54e392110 (patch)
tree925ad09c73b3d120892425c6c5cfbdd45757db58
parentc4ea24c6ee86c2e6544aff08197d1ec2a162aef9 (diff)
downloadhttp-mq-7bd0049e355492e85339c9efa88bf6f54e392110.tar.gz
http-mq-7bd0049e355492e85339c9efa88bf6f54e392110.tar.bz2
http-mq-7bd0049e355492e85339c9efa88bf6f54e392110.tar.xz
http-mq-7bd0049e355492e85339c9efa88bf6f54e392110.zip
o Adding a utility script to read an atom feed and enqueue each entry.HEADmaster
-rw-r--r--atom.php106
1 files changed, 106 insertions, 0 deletions
diff --git a/atom.php b/atom.php
new file mode 100644
index 0000000..dd93725
--- /dev/null
+++ b/atom.php
@@ -0,0 +1,106 @@
+<?php
+/*
+ * Downloads an Atom feed and enqueues each entry on the queue.
+ *
+ * Saves the timestamp of the first entry and uses that to check for
+ * duplicates.
+ *
+ * It will not follow 'next' relation.
+ *
+ * This implicitly assumes that the feed is ordered with the newest
+ * entry first.
+ */
+
+$url = $argv[1];
+$queue = $argv[2];
+$state_file = $argv[3];
+
+function debug($str) {
+ return;
+ echo($str);
+}
+
+debug("Feed=$url\n");
+debug("Queue=$queue\n");
+debug("State=$state_file\n");
+
+$format = "Y-m-d\\TH:i:s\\Z";
+$timezone = new DateTimeZone("UTC");
+
+$state = NULL;
+if(is_readable($state_file)) {
+ $state = file($state_file, FILE_IGNORE_NEW_LINES);
+ $state = DateTime::createFromFormat($format, $state[0], $timezone);
+ if(!$state) {
+ echo("Unable to parse current state file.\n");
+ print_r(DateTime::getLastErrors());
+ }
+}
+$newest = NULL;
+
+libxml_use_internal_errors(true);
+$sxe = simplexml_load_file($url);
+if (!$sxe) {
+ echo "Failed loading XML from $url\n";
+ foreach(libxml_get_errors() as $error) {
+ echo $error->message, "\n";
+ }
+ exit(1);
+}
+
+foreach($sxe->entry as $entry) {
+ $id = $entry->id;
+ $title = $entry->title;
+ $published = $entry->published;
+ $updated = $entry->updated;
+
+ if($updated == NULL) {
+ $updated = $published;
+ }
+ $updated = DateTime::createFromFormat($format, $updated, $timezone);
+# echo "updated=" . $updated->format($format) . "\n";
+
+ if(!isset($id)) {
+ echo "Invalid atom entry, missing <id> from <entry>.";
+ continue;
+ }
+
+ if($updated === FALSE) {
+ echo "Invalid atom entry, missing/invalid <published> or <updated> from <entry>.";
+ continue;
+ }
+
+ if($state != NULL && $state >= $updated) {
+# echo "old entry: $id\n";
+ continue;
+ }
+
+ echo "New entry: $id\n";
+
+ if($newest == NULL || $newest < $updated) {
+ $newest = $updated;
+ }
+
+ $entry_file = tmpfile() or exit("Unable to open temporary file");
+ fwrite($entry_file, $entry->asXML()) or exit("Unable to write xml");;
+ fseek($entry_file, 0);
+
+ $curl = curl_init() or exit("Unable to allocate cURL object");
+ curl_setopt($curl, CURLOPT_URL, $queue);
+# curl_setopt($curl, CURLOPT_VERBOSE, TRUE);
+ curl_setopt($curl, CURLOPT_INFILE, $entry_file);
+ curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "POST");
+ curl_setopt($curl, CURLOPT_HTTPHEADER, array("Content-Type: application/atom+xml;type=entry", "trygve: kul"));
+ curl_exec($curl) or exit("Error while executing cURL");
+
+ // if success, pick out Location headers.
+
+ fclose($entry_file);
+ curl_close($curl);
+}
+
+if(isset($newest)) {
+ file_put_contents($state_file, $newest->format($format) . "\n");
+}
+
+?>