From a742500840276ec694a6d25230ee52c05b385661 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Fri, 9 Nov 2012 15:26:55 +0100 Subject: wip --- .../testing/gitorious/GitoriousAtomFeedParser.java | 167 +++++++++++++++++++++ .../esper/testing/gitorious/GitoriousClient.java | 9 +- .../esper/testing/gitorious/GitoriousEventDao.java | 38 +++-- .../esper/testing/gitorious/GitoriousImporter.java | 67 ++++----- 4 files changed, 227 insertions(+), 54 deletions(-) create mode 100644 src/main/java/io/trygvis/esper/testing/gitorious/GitoriousAtomFeedParser.java (limited to 'src/main/java/io') diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousAtomFeedParser.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousAtomFeedParser.java new file mode 100644 index 0000000..7e0a1b7 --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousAtomFeedParser.java @@ -0,0 +1,167 @@ +package io.trygvis.esper.testing.gitorious; + +import fj.data.*; +import org.apache.abdera.*; +import org.apache.abdera.model.*; +import org.apache.abdera.model.Document; +import org.apache.abdera.model.Element; +import org.apache.abdera.parser.*; +import org.dom4j.*; +import org.dom4j.io.*; + +import java.io.*; +import java.util.*; +import java.util.List; +import java.util.regex.*; + +public class GitoriousAtomFeedParser { + public final Parser parser; + public static final STAXEventReader xmlReader = new STAXEventReader(); + + public GitoriousAtomFeedParser() { + Abdera abdera = new Abdera(); + parser = abdera.getParser(); + } + + public List parseStream(InputStream stream, Option lastUpdate, String projectSlug, String name) { + Document document = parser.parse(stream); + + Feed feed = (Feed) document.getRoot(); + + List events = new ArrayList<>(); + + for (Entry entry : feed.getEntries()) { + Date published = entry.getPublished(); + + // Check if it's old + if (published == null || lastUpdate.isSome() && lastUpdate.some().after(published)) { + continue; + } + + GitoriousEvent event = parseEntry(projectSlug, name, entry); + + if (event == null) { + continue; + } + + events.add(event); + } + + return events; + } + + private static Pattern pPatternFixer = Pattern.compile("

$", Pattern.MULTILINE); + private static Pattern branchPattern = Pattern.compile(".*/(.*)$"); + private static Pattern fromToPattern = Pattern.compile(".*/commit/([0-9a-f]*)/diffs/([0-9a-f]*)"); + + private static GitoriousEvent parseEntry(String projectSlug, String name, Entry entry) { + String entryId = entry.getId().toASCIIString(); + Date published = entry.getPublished(); + String title = entry.getTitle(); + + // Validate element + if (entryId == null || published == null || title == null) { + return null; + } + + String text = entry.getContent(); + + text = pPatternFixer.matcher(text).replaceFirst("

"); + + org.dom4j.Element content; + String xml = "

" + text + "

"; + try { + content = xmlReader.readDocument(new StringReader(xml)).getRootElement(); + + List elements = elements(content); + List nodes = nodes(elements.get(0)); + + String who = nodes.get(0).getText(); + + String event = nodes.get(1).getText().trim(); + switch (event) { + case "created repository": + case "created branch": + // This is similar "pushed", but doesn't contain any info on commit IDs or branches + case "started development of": + return null; + case "pushed": + org.dom4j.Element two = (org.dom4j.Element) nodes.get(2); + org.dom4j.Element six = (org.dom4j.Element) nodes.get(6); + + Matcher branchMatcher = branchPattern.matcher(two.attributeValue("href")); + branchMatcher.matches(); + String branch = branchMatcher.group(1); + + String href = six.attributeValue("href"); + Matcher matcher = fromToPattern.matcher(href); + matcher.matches(); + String from = matcher.group(1); + String to = matcher.group(2); + int commitCount = Integer.parseInt(two.getText().replaceFirst("([0-9]*) commit[s]?", "\\1")); + return new GitoriousPush(projectSlug, name, entryId, published, title, text, who, from, to, branch, commitCount); + default: + System.out.println("Unknown event: " + event); + return null; + } + } catch (Exception e) { + System.out.println("Could not process: " + xml); + return null; + } + } + + private static List nodes(org.dom4j.Element element) { + List nodes = new ArrayList<>(element.nodeCount()); + + @SuppressWarnings("unchecked") Iterator iterator = element.nodeIterator(); + while (iterator.hasNext()) { + nodes.add(iterator.next()); + } + return nodes; + } + + private static List elements(org.dom4j.Element content) { + List elements = new ArrayList<>(); + + @SuppressWarnings("unchecked") Iterator iterator = content.elementIterator(); + while (iterator.hasNext()) { + elements.add(iterator.next()); + } + return elements; + } +} + +abstract class GitoriousEvent { + public final String projectSlug; + public final String name; + public final String entryId; + public final Date published; + public final String title; + public final String content; + public final String who; + + protected GitoriousEvent(String projectSlug, String name, String entryId, Date published, String title, String content, String who) { + this.projectSlug = projectSlug; + this.name = name; + this.entryId = entryId; + this.published = published; + this.title = title; + this.content = content; + this.who = who; + } +} + +class GitoriousPush extends GitoriousEvent { + public final String from; + public final String to; + public final String branch; + public final int commitCount; + + GitoriousPush(String projectSlug, String name, String entryId, Date published, String title, String content, String who, String from, String to, String branch, int commitCount) { + super(projectSlug, name, entryId, published, title, content, who); + this.from = from; + this.to = to; + this.branch = branch; + this.commitCount = commitCount; + } +} diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java index a58f1de..892d8d0 100644 --- a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java @@ -63,8 +63,8 @@ public class GitoriousClient { return all; } - public URI atomFeed(String slug) { - return URI.create(baseUrl + "/" + slug + ".atom"); + public URI atomFeed(String projectSlug, String repositoryName) { + return URI.create(baseUrl + "/" + projectSlug + "/" + repositoryName + ".atom"); } } @@ -97,7 +97,7 @@ class GitoriousProjectXml implements Comparable { return null; } - List list = (List) mainlines.elements("repository"); + @SuppressWarnings("unchecked") List list = (List) mainlines.elements("repository"); List repositoryList = new ArrayList<>(list.size()); for (Element repository : list) { GitoriousRepositoryXml r = GitoriousRepositoryXml.fromXml(slug, repository); @@ -114,7 +114,8 @@ class GitoriousProjectXml implements Comparable { public static List projectsFromXml(Element root) throws URISyntaxException { List projects = new ArrayList<>(); - for (Element project : (List) root.elements("project")) { + @SuppressWarnings("unchecked") List elements = (List) root.elements("project"); + for (Element project : elements) { GitoriousProjectXml p = GitoriousProjectXml.fromXml(project); if (p == null) { diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousEventDao.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousEventDao.java index 603609e..93f31a5 100644 --- a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousEventDao.java +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousEventDao.java @@ -2,26 +2,44 @@ package io.trygvis.esper.testing.gitorious; import java.sql.*; -public class GitoriousEventDao { - private final PreparedStatement countEntryId; - private final PreparedStatement insertChange; +public class GitoriousEventDao extends Dao { public GitoriousEventDao(Connection c) throws SQLException { - countEntryId = c.prepareStatement("SELECT count(entry_id) FROM gitorious_event WHERE entry_id=?"); - insertChange = c.prepareStatement("INSERT INTO gitorious_event(entry_id, text) VALUES(?, ?)"); + super(c); } + private final PreparedStatement countEntryId = prepareStatement("SELECT count(entry_id) FROM gitorious_event WHERE entry_id=?"); + public int countEntryId(String entryId) throws SQLException { countEntryId.setString(1, entryId); - try(ResultSet rs = countEntryId.executeQuery()) { + try (ResultSet rs = countEntryId.executeQuery()) { rs.next(); return rs.getInt(1); } } - public void insertChange(String entryId, String text) throws SQLException { - insertChange.setString(1, entryId); - insertChange.setString(2, text); - insertChange.executeUpdate(); + private final PreparedStatement insertPush = prepareStatement("INSERT INTO gitorious_event(project_slug, name, entry_id, published, title, content, event_type, who, \"from\", \"to\", branch, commit_count) VALUES(?, ?, ?, ?, ?, ?, 'PUSH', ?, ?, ?, ?, ?)"); + + public void insertEvent(GitoriousEvent event) throws SQLException { + PreparedStatement s; + if (event instanceof GitoriousPush) { + GitoriousPush push = (GitoriousPush) event; + s = insertPush; + s.setString(7, push.who); + s.setString(8, push.from); + s.setString(9, push.to); + s.setString(10, push.branch); + s.setInt(11, push.commitCount); + } else { + throw new SQLException("Unknown event type: " + event.getClass().getName()); + } + + s.setString(1, event.projectSlug); + s.setString(2, event.name); + s.setString(3, event.entryId); + s.setTimestamp(4, dateToTimestamp.f(event.published)); + s.setString(5, event.title); + s.setString(6, event.content); + s.executeUpdate(); } } diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java index 4cd0916..4ee6322 100644 --- a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java @@ -5,14 +5,13 @@ import fj.data.*; import static fj.data.Option.*; import io.trygvis.esper.testing.*; import static java.lang.System.*; -import org.apache.abdera.*; -import org.apache.abdera.model.*; import org.apache.abdera.parser.*; import org.codehaus.httpcache4j.*; import org.codehaus.httpcache4j.cache.*; import org.codehaus.httpcache4j.client.*; import java.io.*; +import java.net.*; import java.sql.*; import java.util.Date; import java.util.*; @@ -21,7 +20,7 @@ import java.util.Set; import java.util.concurrent.*; public class GitoriousImporter { - private final Parser parser; + private final GitoriousAtomFeedParser parser; private final BoneCP boneCp; private final GitoriousClient gitoriousClient; private final HTTPCache httpCache; @@ -32,8 +31,7 @@ public class GitoriousImporter { } public GitoriousImporter(String jdbcUrl, String jdbcUsername, String jdbcPassword) throws Exception { - Abdera abdera = new Abdera(); - parser = abdera.getParser(); + parser = new GitoriousAtomFeedParser(); BoneCPConfig config = new BoneCPConfig(); config.setJdbcUrl(jdbcUrl); @@ -48,22 +46,25 @@ public class GitoriousImporter { gitoriousClient = new GitoriousClient(httpCache, "http://gitorious.org"); - final ScheduledThreadPoolExecutor service = new ScheduledThreadPoolExecutor(1); + final ScheduledThreadPoolExecutor service = new ScheduledThreadPoolExecutor(2); + boolean projectsUpdateEnabled = false; int projectsUpdateDelay = 0 * 1000; int projectsUpdateInterval = 60 * 1000; int repositoriesUpdateDelay = 0; int repositoriesUpdateInterval = 60 * 1000; - service.scheduleAtFixedRate(new Runnable() { - public void run() { - try { - discoverProjects(); - } catch (Exception e) { - e.printStackTrace(System.out); + if (projectsUpdateEnabled) { + service.scheduleAtFixedRate(new Runnable() { + public void run() { + try { + discoverProjects(); + } catch (Exception e) { + e.printStackTrace(System.out); + } } - } - }, projectsUpdateDelay, projectsUpdateInterval, TimeUnit.MILLISECONDS); + }, projectsUpdateDelay, projectsUpdateInterval, TimeUnit.MILLISECONDS); + } service.scheduleAtFixedRate(new Runnable() { public void run() { @@ -90,13 +91,15 @@ public class GitoriousImporter { System.out.println("New project: " + project.slug + ", has " + project.repositories.size() + " repositories."); projectDao.insertProject(project.slug); for (GitoriousRepositoryXml repository : project.repositories) { - repoDao.insertRepository(repository.projectSlug, repository.name, gitoriousClient.atomFeed(project.slug)); + URI atomFeed = gitoriousClient.atomFeed(repository.projectSlug, repository.name); + repoDao.insertRepository(repository.projectSlug, repository.name, atomFeed); } } else { for (GitoriousRepositoryXml repository : project.repositories) { if (repoDao.countRepositories(repository.projectSlug, repository.name) == 0) { System.out.println("New repository for project " + repository.projectSlug + ": " + repository.name); - repoDao.insertRepository(repository.projectSlug, repository.name, gitoriousClient.atomFeed(project.slug)); + URI atomFeed = gitoriousClient.atomFeed(repository.projectSlug, repository.name); + repoDao.insertRepository(repository.projectSlug, repository.name, atomFeed); } } @@ -160,7 +163,7 @@ public class GitoriousImporter { GitoriousRepositoryDao repositoryDao = daos.gitoriousRepositoryDao; GitoriousEventDao eventDao = daos.gitoriousEventDao; - Option lastUpdate = repository.lastUpdate; + Option lastUpdate = repository.lastSuccessfulUpdate; System.out.println("Fetching " + repository.atomFeed); @@ -174,9 +177,9 @@ public class GitoriousImporter { System.out.println("responseDate = " + responseDate); - Document document; + List events; try { - document = parser.parse(response.getPayload().getInputStream()); + events = parser.parseStream(response.getPayload().getInputStream(), lastUpdate, repository.projectSlug, repository.name); } catch (ParseException e) { repositoryDao.updateTimestamp(repository.projectSlug, repository.name, new Timestamp(currentTimeMillis()), Option.none()); System.out.println("Error parsing " + repository.atomFeed); @@ -184,28 +187,12 @@ public class GitoriousImporter { return; } - Feed feed = (Feed) document.getRoot(); - - for (Entry entry : feed.getEntries()) { - String entryId = entry.getId().toASCIIString(); - Date published = entry.getPublished(); - String title = entry.getTitle(); - - // Validate element - if (entryId == null || published == null || title == null) { - continue; - } - - if (lastUpdate.isSome() && lastUpdate.some().after(published)) { - System.out.println("Old entry: " + repository.atomFeed + ":" + entryId); - continue; - } - - if (eventDao.countEntryId(entryId) == 0) { - System.out.println("New entry: " + repository.atomFeed + ":" + entryId); - eventDao.insertChange(entryId, title); + for (GitoriousEvent event : events) { + if (eventDao.countEntryId(event.entryId) == 0) { + System.out.println("New entry in " + repository.atomFeed + ": " + event.entryId); + eventDao.insertEvent(event); } else { - System.out.println("Already imported entry: " + entryId); + System.out.println("Already imported entry: " + event.entryId); } } -- cgit v1.2.3