aboutsummaryrefslogtreecommitdiff
path: root/src/main
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2012-11-09 15:26:55 +0100
committerTrygve Laugstøl <trygvis@inamo.no>2012-11-09 15:26:55 +0100
commita742500840276ec694a6d25230ee52c05b385661 (patch)
treeb02eebec6f385efa057feea15a17112e1f97e0d4 /src/main
parent8b3db6ef6307191609d2dab837032db16aa13375 (diff)
downloadesper-testing-a742500840276ec694a6d25230ee52c05b385661.tar.gz
esper-testing-a742500840276ec694a6d25230ee52c05b385661.tar.bz2
esper-testing-a742500840276ec694a6d25230ee52c05b385661.tar.xz
esper-testing-a742500840276ec694a6d25230ee52c05b385661.zip
wip
Diffstat (limited to 'src/main')
-rw-r--r--src/main/java/io/trygvis/esper/testing/gitorious/GitoriousAtomFeedParser.java167
-rw-r--r--src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java9
-rw-r--r--src/main/java/io/trygvis/esper/testing/gitorious/GitoriousEventDao.java38
-rw-r--r--src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java67
-rw-r--r--src/main/resources/ddl-core.sql20
-rw-r--r--src/main/resources/ddl-gitorious.sql43
-rw-r--r--src/main/resources/ddl.sql26
7 files changed, 290 insertions, 80 deletions
diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousAtomFeedParser.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousAtomFeedParser.java
new file mode 100644
index 0000000..7e0a1b7
--- /dev/null
+++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousAtomFeedParser.java
@@ -0,0 +1,167 @@
+package io.trygvis.esper.testing.gitorious;
+
+import fj.data.*;
+import org.apache.abdera.*;
+import org.apache.abdera.model.*;
+import org.apache.abdera.model.Document;
+import org.apache.abdera.model.Element;
+import org.apache.abdera.parser.*;
+import org.dom4j.*;
+import org.dom4j.io.*;
+
+import java.io.*;
+import java.util.*;
+import java.util.List;
+import java.util.regex.*;
+
+public class GitoriousAtomFeedParser {
+ public final Parser parser;
+ public static final STAXEventReader xmlReader = new STAXEventReader();
+
+ public GitoriousAtomFeedParser() {
+ Abdera abdera = new Abdera();
+ parser = abdera.getParser();
+ }
+
+ public List<GitoriousEvent> parseStream(InputStream stream, Option<Date> lastUpdate, String projectSlug, String name) {
+ Document<Element> document = parser.parse(stream);
+
+ Feed feed = (Feed) document.getRoot();
+
+ List<GitoriousEvent> events = new ArrayList<>();
+
+ for (Entry entry : feed.getEntries()) {
+ Date published = entry.getPublished();
+
+ // Check if it's old
+ if (published == null || lastUpdate.isSome() && lastUpdate.some().after(published)) {
+ continue;
+ }
+
+ GitoriousEvent event = parseEntry(projectSlug, name, entry);
+
+ if (event == null) {
+ continue;
+ }
+
+ events.add(event);
+ }
+
+ return events;
+ }
+
+ private static Pattern pPatternFixer = Pattern.compile("<p>$", Pattern.MULTILINE);
+ private static Pattern branchPattern = Pattern.compile(".*/(.*)$");
+ private static Pattern fromToPattern = Pattern.compile(".*/commit/([0-9a-f]*)/diffs/([0-9a-f]*)");
+
+ private static GitoriousEvent parseEntry(String projectSlug, String name, Entry entry) {
+ String entryId = entry.getId().toASCIIString();
+ Date published = entry.getPublished();
+ String title = entry.getTitle();
+
+ // Validate element
+ if (entryId == null || published == null || title == null) {
+ return null;
+ }
+
+ String text = entry.getContent();
+
+ text = pPatternFixer.matcher(text).replaceFirst("</p>");
+
+ org.dom4j.Element content;
+ String xml = "<p xmlns:gts='urn:gitorious'>" + text + "</p>";
+ try {
+ content = xmlReader.readDocument(new StringReader(xml)).getRootElement();
+
+ List<org.dom4j.Element> elements = elements(content);
+ List<Node> nodes = nodes(elements.get(0));
+
+ String who = nodes.get(0).getText();
+
+ String event = nodes.get(1).getText().trim();
+ switch (event) {
+ case "created repository":
+ case "created branch":
+ // This is similar "pushed", but doesn't contain any info on commit IDs or branches
+ case "started development of":
+ return null;
+ case "pushed":
+ org.dom4j.Element two = (org.dom4j.Element) nodes.get(2);
+ org.dom4j.Element six = (org.dom4j.Element) nodes.get(6);
+
+ Matcher branchMatcher = branchPattern.matcher(two.attributeValue("href"));
+ branchMatcher.matches();
+ String branch = branchMatcher.group(1);
+
+ String href = six.attributeValue("href");
+ Matcher matcher = fromToPattern.matcher(href);
+ matcher.matches();
+ String from = matcher.group(1);
+ String to = matcher.group(2);
+ int commitCount = Integer.parseInt(two.getText().replaceFirst("([0-9]*) commit[s]?", "\\1"));
+ return new GitoriousPush(projectSlug, name, entryId, published, title, text, who, from, to, branch, commitCount);
+ default:
+ System.out.println("Unknown event: " + event);
+ return null;
+ }
+ } catch (Exception e) {
+ System.out.println("Could not process: " + xml);
+ return null;
+ }
+ }
+
+ private static List<Node> nodes(org.dom4j.Element element) {
+ List<Node> nodes = new ArrayList<>(element.nodeCount());
+
+ @SuppressWarnings("unchecked") Iterator<Node> iterator = element.nodeIterator();
+ while (iterator.hasNext()) {
+ nodes.add(iterator.next());
+ }
+ return nodes;
+ }
+
+ private static List<org.dom4j.Element> elements(org.dom4j.Element content) {
+ List<org.dom4j.Element> elements = new ArrayList<>();
+
+ @SuppressWarnings("unchecked") Iterator<org.dom4j.Element> iterator = content.elementIterator();
+ while (iterator.hasNext()) {
+ elements.add(iterator.next());
+ }
+ return elements;
+ }
+}
+
+abstract class GitoriousEvent {
+ public final String projectSlug;
+ public final String name;
+ public final String entryId;
+ public final Date published;
+ public final String title;
+ public final String content;
+ public final String who;
+
+ protected GitoriousEvent(String projectSlug, String name, String entryId, Date published, String title, String content, String who) {
+ this.projectSlug = projectSlug;
+ this.name = name;
+ this.entryId = entryId;
+ this.published = published;
+ this.title = title;
+ this.content = content;
+ this.who = who;
+ }
+}
+
+class GitoriousPush extends GitoriousEvent {
+ public final String from;
+ public final String to;
+ public final String branch;
+ public final int commitCount;
+
+ GitoriousPush(String projectSlug, String name, String entryId, Date published, String title, String content, String who, String from, String to, String branch, int commitCount) {
+ super(projectSlug, name, entryId, published, title, content, who);
+ this.from = from;
+ this.to = to;
+ this.branch = branch;
+ this.commitCount = commitCount;
+ }
+}
diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java
index a58f1de..892d8d0 100644
--- a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java
+++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java
@@ -63,8 +63,8 @@ public class GitoriousClient {
return all;
}
- public URI atomFeed(String slug) {
- return URI.create(baseUrl + "/" + slug + ".atom");
+ public URI atomFeed(String projectSlug, String repositoryName) {
+ return URI.create(baseUrl + "/" + projectSlug + "/" + repositoryName + ".atom");
}
}
@@ -97,7 +97,7 @@ class GitoriousProjectXml implements Comparable<GitoriousProjectXml> {
return null;
}
- List<Element> list = (List<Element>) mainlines.elements("repository");
+ @SuppressWarnings("unchecked") List<Element> list = (List<Element>) mainlines.elements("repository");
List<GitoriousRepositoryXml> repositoryList = new ArrayList<>(list.size());
for (Element repository : list) {
GitoriousRepositoryXml r = GitoriousRepositoryXml.fromXml(slug, repository);
@@ -114,7 +114,8 @@ class GitoriousProjectXml implements Comparable<GitoriousProjectXml> {
public static List<GitoriousProjectXml> projectsFromXml(Element root) throws URISyntaxException {
List<GitoriousProjectXml> projects = new ArrayList<>();
- for (Element project : (List<Element>) root.elements("project")) {
+ @SuppressWarnings("unchecked") List<Element> elements = (List<Element>) root.elements("project");
+ for (Element project : elements) {
GitoriousProjectXml p = GitoriousProjectXml.fromXml(project);
if (p == null) {
diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousEventDao.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousEventDao.java
index 603609e..93f31a5 100644
--- a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousEventDao.java
+++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousEventDao.java
@@ -2,26 +2,44 @@ package io.trygvis.esper.testing.gitorious;
import java.sql.*;
-public class GitoriousEventDao {
- private final PreparedStatement countEntryId;
- private final PreparedStatement insertChange;
+public class GitoriousEventDao extends Dao {
public GitoriousEventDao(Connection c) throws SQLException {
- countEntryId = c.prepareStatement("SELECT count(entry_id) FROM gitorious_event WHERE entry_id=?");
- insertChange = c.prepareStatement("INSERT INTO gitorious_event(entry_id, text) VALUES(?, ?)");
+ super(c);
}
+ private final PreparedStatement countEntryId = prepareStatement("SELECT count(entry_id) FROM gitorious_event WHERE entry_id=?");
+
public int countEntryId(String entryId) throws SQLException {
countEntryId.setString(1, entryId);
- try(ResultSet rs = countEntryId.executeQuery()) {
+ try (ResultSet rs = countEntryId.executeQuery()) {
rs.next();
return rs.getInt(1);
}
}
- public void insertChange(String entryId, String text) throws SQLException {
- insertChange.setString(1, entryId);
- insertChange.setString(2, text);
- insertChange.executeUpdate();
+ private final PreparedStatement insertPush = prepareStatement("INSERT INTO gitorious_event(project_slug, name, entry_id, published, title, content, event_type, who, \"from\", \"to\", branch, commit_count) VALUES(?, ?, ?, ?, ?, ?, 'PUSH', ?, ?, ?, ?, ?)");
+
+ public void insertEvent(GitoriousEvent event) throws SQLException {
+ PreparedStatement s;
+ if (event instanceof GitoriousPush) {
+ GitoriousPush push = (GitoriousPush) event;
+ s = insertPush;
+ s.setString(7, push.who);
+ s.setString(8, push.from);
+ s.setString(9, push.to);
+ s.setString(10, push.branch);
+ s.setInt(11, push.commitCount);
+ } else {
+ throw new SQLException("Unknown event type: " + event.getClass().getName());
+ }
+
+ s.setString(1, event.projectSlug);
+ s.setString(2, event.name);
+ s.setString(3, event.entryId);
+ s.setTimestamp(4, dateToTimestamp.f(event.published));
+ s.setString(5, event.title);
+ s.setString(6, event.content);
+ s.executeUpdate();
}
}
diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java
index 4cd0916..4ee6322 100644
--- a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java
+++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java
@@ -5,14 +5,13 @@ import fj.data.*;
import static fj.data.Option.*;
import io.trygvis.esper.testing.*;
import static java.lang.System.*;
-import org.apache.abdera.*;
-import org.apache.abdera.model.*;
import org.apache.abdera.parser.*;
import org.codehaus.httpcache4j.*;
import org.codehaus.httpcache4j.cache.*;
import org.codehaus.httpcache4j.client.*;
import java.io.*;
+import java.net.*;
import java.sql.*;
import java.util.Date;
import java.util.*;
@@ -21,7 +20,7 @@ import java.util.Set;
import java.util.concurrent.*;
public class GitoriousImporter {
- private final Parser parser;
+ private final GitoriousAtomFeedParser parser;
private final BoneCP boneCp;
private final GitoriousClient gitoriousClient;
private final HTTPCache httpCache;
@@ -32,8 +31,7 @@ public class GitoriousImporter {
}
public GitoriousImporter(String jdbcUrl, String jdbcUsername, String jdbcPassword) throws Exception {
- Abdera abdera = new Abdera();
- parser = abdera.getParser();
+ parser = new GitoriousAtomFeedParser();
BoneCPConfig config = new BoneCPConfig();
config.setJdbcUrl(jdbcUrl);
@@ -48,22 +46,25 @@ public class GitoriousImporter {
gitoriousClient = new GitoriousClient(httpCache, "http://gitorious.org");
- final ScheduledThreadPoolExecutor service = new ScheduledThreadPoolExecutor(1);
+ final ScheduledThreadPoolExecutor service = new ScheduledThreadPoolExecutor(2);
+ boolean projectsUpdateEnabled = false;
int projectsUpdateDelay = 0 * 1000;
int projectsUpdateInterval = 60 * 1000;
int repositoriesUpdateDelay = 0;
int repositoriesUpdateInterval = 60 * 1000;
- service.scheduleAtFixedRate(new Runnable() {
- public void run() {
- try {
- discoverProjects();
- } catch (Exception e) {
- e.printStackTrace(System.out);
+ if (projectsUpdateEnabled) {
+ service.scheduleAtFixedRate(new Runnable() {
+ public void run() {
+ try {
+ discoverProjects();
+ } catch (Exception e) {
+ e.printStackTrace(System.out);
+ }
}
- }
- }, projectsUpdateDelay, projectsUpdateInterval, TimeUnit.MILLISECONDS);
+ }, projectsUpdateDelay, projectsUpdateInterval, TimeUnit.MILLISECONDS);
+ }
service.scheduleAtFixedRate(new Runnable() {
public void run() {
@@ -90,13 +91,15 @@ public class GitoriousImporter {
System.out.println("New project: " + project.slug + ", has " + project.repositories.size() + " repositories.");
projectDao.insertProject(project.slug);
for (GitoriousRepositoryXml repository : project.repositories) {
- repoDao.insertRepository(repository.projectSlug, repository.name, gitoriousClient.atomFeed(project.slug));
+ URI atomFeed = gitoriousClient.atomFeed(repository.projectSlug, repository.name);
+ repoDao.insertRepository(repository.projectSlug, repository.name, atomFeed);
}
} else {
for (GitoriousRepositoryXml repository : project.repositories) {
if (repoDao.countRepositories(repository.projectSlug, repository.name) == 0) {
System.out.println("New repository for project " + repository.projectSlug + ": " + repository.name);
- repoDao.insertRepository(repository.projectSlug, repository.name, gitoriousClient.atomFeed(project.slug));
+ URI atomFeed = gitoriousClient.atomFeed(repository.projectSlug, repository.name);
+ repoDao.insertRepository(repository.projectSlug, repository.name, atomFeed);
}
}
@@ -160,7 +163,7 @@ public class GitoriousImporter {
GitoriousRepositoryDao repositoryDao = daos.gitoriousRepositoryDao;
GitoriousEventDao eventDao = daos.gitoriousEventDao;
- Option<Date> lastUpdate = repository.lastUpdate;
+ Option<Date> lastUpdate = repository.lastSuccessfulUpdate;
System.out.println("Fetching " + repository.atomFeed);
@@ -174,9 +177,9 @@ public class GitoriousImporter {
System.out.println("responseDate = " + responseDate);
- Document<Element> document;
+ List<GitoriousEvent> events;
try {
- document = parser.parse(response.getPayload().getInputStream());
+ events = parser.parseStream(response.getPayload().getInputStream(), lastUpdate, repository.projectSlug, repository.name);
} catch (ParseException e) {
repositoryDao.updateTimestamp(repository.projectSlug, repository.name, new Timestamp(currentTimeMillis()), Option.<Date>none());
System.out.println("Error parsing " + repository.atomFeed);
@@ -184,28 +187,12 @@ public class GitoriousImporter {
return;
}
- Feed feed = (Feed) document.getRoot();
-
- for (Entry entry : feed.getEntries()) {
- String entryId = entry.getId().toASCIIString();
- Date published = entry.getPublished();
- String title = entry.getTitle();
-
- // Validate element
- if (entryId == null || published == null || title == null) {
- continue;
- }
-
- if (lastUpdate.isSome() && lastUpdate.some().after(published)) {
- System.out.println("Old entry: " + repository.atomFeed + ":" + entryId);
- continue;
- }
-
- if (eventDao.countEntryId(entryId) == 0) {
- System.out.println("New entry: " + repository.atomFeed + ":" + entryId);
- eventDao.insertChange(entryId, title);
+ for (GitoriousEvent event : events) {
+ if (eventDao.countEntryId(event.entryId) == 0) {
+ System.out.println("New entry in " + repository.atomFeed + ": " + event.entryId);
+ eventDao.insertEvent(event);
} else {
- System.out.println("Already imported entry: " + entryId);
+ System.out.println("Already imported entry: " + event.entryId);
}
}
diff --git a/src/main/resources/ddl-core.sql b/src/main/resources/ddl-core.sql
new file mode 100644
index 0000000..662ed10
--- /dev/null
+++ b/src/main/resources/ddl-core.sql
@@ -0,0 +1,20 @@
+BEGIN;
+
+DROP TABLE IF EXISTS subscription_gitorious_repository;
+DROP TABLE IF EXISTS subscriber;
+
+CREATE TABLE subscriber (
+ name VARCHAR(100) PRIMARY KEY
+);
+
+CREATE TABLE subscription_gitorious_repository (
+ subscriber_name VARCHAR(100) REFERENCES subscriber (name),
+ gitorious_repository_project_slug VARCHAR(100),
+ gitorious_repository_name VARCHAR(100)
+-- CONSTRAINT subscription_gitorious_repository_2_gitorious_repository FOREIGN KEY (gitorious_repository_project_slug, gitorious_repository_name) REFERENCES gitorious_repository (project_slug, name)
+);
+
+INSERT INTO subscriber VALUES ('trygvis');
+INSERT INTO subscription_gitorious_repository VALUES ('trygvis','esper-test-project', 'esper-test-project');
+
+COMMIT;
diff --git a/src/main/resources/ddl-gitorious.sql b/src/main/resources/ddl-gitorious.sql
new file mode 100644
index 0000000..929a326
--- /dev/null
+++ b/src/main/resources/ddl-gitorious.sql
@@ -0,0 +1,43 @@
+BEGIN;
+
+DROP TABLE IF EXISTS gitorious_event;
+DROP TABLE IF EXISTS gitorious_repository;
+DROP TABLE IF EXISTS gitorious_project;
+
+CREATE TABLE gitorious_project (
+ slug VARCHAR(1000) PRIMARY KEY
+);
+
+CREATE TABLE gitorious_repository (
+ project_slug VARCHAR(1000) NOT NULL,
+ name VARCHAR(1000) NOT NULL,
+ atom_feed VARCHAR(1000) NOT NULL,
+ last_update TIMESTAMP,
+ last_successful_update TIMESTAMP,
+ CONSTRAINT gitorious_repository_pk PRIMARY KEY (project_slug, name),
+ CONSTRAINT gitorious_repository_2_gitorious_project FOREIGN KEY (project_slug) REFERENCES gitorious_project (slug)
+);
+
+CREATE TABLE gitorious_event (
+ project_slug VARCHAR(1000) NOT NULL,
+ name VARCHAR(1000) NOT NULL,
+
+-- The raw values for debugging
+ entry_id VARCHAR(1000) PRIMARY KEY,
+ published TIMESTAMP NOT NULL,
+ title VARCHAR(1000),
+ content VARCHAR(1000),
+
+ event_type VARCHAR(20),
+ who VARCHAR(100),
+-- Push
+ "from" CHAR(40),
+ "to" CHAR(40),
+ branch VARCHAR(100),
+ commit_count INTEGER
+);
+
+INSERT INTO gitorious_project VALUES ('esper-test-project');
+INSERT INTO gitorious_repository VALUES ('esper-test-project', 'esper-test-project', 'https://gitorious.org/esper-test-project/esper-test-project.atom');
+
+COMMIT;
diff --git a/src/main/resources/ddl.sql b/src/main/resources/ddl.sql
deleted file mode 100644
index 4e641cc..0000000
--- a/src/main/resources/ddl.sql
+++ /dev/null
@@ -1,26 +0,0 @@
-BEGIN;
-
-DROP TABLE IF EXISTS gitorious_event;
-DROP TABLE IF EXISTS gitorious_repository;
-DROP TABLE IF EXISTS gitorious_project;
-
-CREATE TABLE gitorious_project (
- slug VARCHAR(1000) PRIMARY KEY
-);
-
-CREATE TABLE gitorious_repository (
- project_slug VARCHAR(1000) NOT NULL,
- name VARCHAR(1000) NOT NULL,
- atom_feed VARCHAR(1000) NOT NULL,
- last_update TIMESTAMP,
- last_successful_update TIMESTAMP,
- CONSTRAINT gitorious_repository_pk PRIMARY KEY (project_slug, name),
- CONSTRAINT gitorious_repository_2_gitorious_project FOREIGN KEY (project_slug) REFERENCES gitorious_project (slug)
-);
-
-CREATE TABLE gitorious_event (
- entry_id VARCHAR(1000) PRIMARY KEY,
- text VARCHAR(1000)
-);
-
-COMMIT;