From d6a532c420a93b211a9747c5fb807a3f2767fa22 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Thu, 8 Nov 2012 13:20:47 +0100 Subject: o Slurping down the entire list of projects. --- .../io/trygvis/esper/testing/GitoriousDao.java | 27 ----- .../trygvis/esper/testing/GitoriousImporter.java | 98 ----------------- .../io/trygvis/esper/testing/ResourceManager.java | 31 ++++++ .../esper/testing/gitorious/GitoriousClient.java | 64 +++++++++++ .../esper/testing/gitorious/GitoriousDao.java | 27 +++++ .../esper/testing/gitorious/GitoriousImporter.java | 120 +++++++++++++++++++++ .../esper/testing/gitorious/GitoriousProject.java | 89 +++++++++++++++ 7 files changed, 331 insertions(+), 125 deletions(-) delete mode 100644 src/main/java/io/trygvis/esper/testing/GitoriousDao.java delete mode 100644 src/main/java/io/trygvis/esper/testing/GitoriousImporter.java create mode 100644 src/main/java/io/trygvis/esper/testing/ResourceManager.java create mode 100644 src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java create mode 100644 src/main/java/io/trygvis/esper/testing/gitorious/GitoriousDao.java create mode 100644 src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java create mode 100644 src/main/java/io/trygvis/esper/testing/gitorious/GitoriousProject.java (limited to 'src/main/java/io/trygvis') diff --git a/src/main/java/io/trygvis/esper/testing/GitoriousDao.java b/src/main/java/io/trygvis/esper/testing/GitoriousDao.java deleted file mode 100644 index bf5d954..0000000 --- a/src/main/java/io/trygvis/esper/testing/GitoriousDao.java +++ /dev/null @@ -1,27 +0,0 @@ -package io.trygvis.esper.testing; - -import java.sql.*; - -public class GitoriousDao { - private final PreparedStatement countEntryId; - private final PreparedStatement insertChange; - - public GitoriousDao(Connection c) throws SQLException { - countEntryId = c.prepareStatement("SELECT count(entry_id) FROM gitorious_change WHERE entry_id=?"); - insertChange = c.prepareStatement("INSERT INTO gitorious_change(entry_id, text) VALUES(?, ?)"); - } - - public int countEntryId(String entryId) throws SQLException { - countEntryId.setString(1, entryId); - try(ResultSet rs = countEntryId.executeQuery()) { - rs.next(); - return rs.getInt(1); - } - } - - public void insertChange(String entryId, String text) throws SQLException { - insertChange.setString(1, entryId); - insertChange.setString(2, text); - insertChange.executeUpdate(); - } -} diff --git a/src/main/java/io/trygvis/esper/testing/GitoriousImporter.java b/src/main/java/io/trygvis/esper/testing/GitoriousImporter.java deleted file mode 100644 index c79d4f5..0000000 --- a/src/main/java/io/trygvis/esper/testing/GitoriousImporter.java +++ /dev/null @@ -1,98 +0,0 @@ -package io.trygvis.esper.testing; - -import org.apache.abdera.*; -import org.apache.abdera.model.*; -import org.apache.abdera.protocol.client.*; -import org.apache.abdera.protocol.client.cache.*; - -import java.sql.*; -import java.util.Date; - -public class GitoriousImporter { - private final AbderaClient abderaClient; - private final Connection connection; - private final AtomDao atomDao; - private final GitoriousDao gitoriousDao; - - public GitoriousImporter(AbderaClient abderaClient, Connection c) throws SQLException { - this.abderaClient = abderaClient; - this.connection = c; - atomDao = new AtomDao(c); - gitoriousDao = new GitoriousDao(c); - } - - public static void main(String[] args) throws InterruptedException, SQLException { - Main.configureLog4j(); - Abdera abdera = new Abdera(); - AbderaClient abderaClient = new AbderaClient(abdera, new LRUCache(abdera, 1000)); - - Connection connection = DriverManager.getConnection(DbMain.JDBC_URL, "esper", ""); - connection.setAutoCommit(false); - - new GitoriousImporter(abderaClient, connection).work(); - } - - private void work() throws SQLException, InterruptedException { - String url = "http://qt.gitorious.org/projects/show/qt.atom"; - - while (true) { - Timestamp lastUpdate = atomDao.getAtomFeed(url); - - System.out.println("Fetching " + url); - RequestOptions options = new RequestOptions(); - if(lastUpdate != null) { - options.setIfModifiedSince(lastUpdate); - } - - long start = System.currentTimeMillis(); - ClientResponse response = abderaClient.get(url, options); - long end = System.currentTimeMillis(); - System.out.println("Fetched in " + (end - start) + "ms"); - - // Use the server's timestamp - Date responseDate = response.getDateHeader("Date"); - - System.out.println("responseDate = " + responseDate); - - Document document = response.getDocument(); - Feed feed = (Feed) document.getRoot(); - - for (Entry entry : feed.getEntries()) { - String entryId = entry.getId().toASCIIString(); - Date published = entry.getPublished(); - String title = entry.getTitle(); - - // Validate element - if (entryId == null || published == null || title == null) { - continue; - } - - if (lastUpdate != null && lastUpdate.after(published)) { - System.out.println("Old entry: " + url + ":" + entryId); - continue; - } - - System.out.println("New entry: " + url + ":" + entryId); - if(gitoriousDao.countEntryId(entryId) == 0) { - gitoriousDao.insertChange(entryId, title); - } - else { - System.out.println("Already imported entry: " + entryId); - } - } - - if (lastUpdate == null) { - System.out.println("New atom feed"); - atomDao.insertAtomFeed(url, new Timestamp(responseDate.getTime())); - } else { - System.out.println("Updating atom feed"); - atomDao.updateAtomFeed(url, lastUpdate); - } - - connection.commit(); - - System.out.println("Sleeping"); - Thread.sleep(10 * 1000); - } - } -} diff --git a/src/main/java/io/trygvis/esper/testing/ResourceManager.java b/src/main/java/io/trygvis/esper/testing/ResourceManager.java new file mode 100644 index 0000000..e9a0068 --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/ResourceManager.java @@ -0,0 +1,31 @@ +package io.trygvis.esper.testing; + +import fj.*; + +import java.util.*; +import java.util.concurrent.*; + +public class ResourceManager { + private final Equal equal; + private final Callable> discoverer; + private Map map = Collections.emptyMap(); + + public ResourceManager(Equal equal, ScheduledExecutorService executorService, int delay, Callable> discoverer) { + this.equal = equal; + this.discoverer = discoverer; + + executorService.scheduleWithFixedDelay(new Runnable() { + public void run() { + work(); + } + }, delay, delay, TimeUnit.MILLISECONDS); + } + + private void work() { + try { + List keys = discoverer.call(); + } catch (Exception e) { + return; + } + } +} diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java new file mode 100644 index 0000000..9479faa --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java @@ -0,0 +1,64 @@ +package io.trygvis.esper.testing.gitorious; + +import static java.lang.System.*; +import org.apache.commons.io.*; +import static org.codehaus.httpcache4j.HTTPMethod.*; +import org.codehaus.httpcache4j.*; +import org.codehaus.httpcache4j.cache.*; +import org.dom4j.*; +import org.dom4j.io.*; + +import javax.xml.stream.*; +import java.io.*; +import java.net.*; +import java.util.*; + +public class GitoriousClient { + public static final STAXEventReader xmlReader = new STAXEventReader(); + private final HTTPCache httpCache; + private final String gitoriousUrl; + private final String projectsUri; + + public GitoriousClient(HTTPCache httpCache, String gitoriousUrl) throws URISyntaxException { + this.httpCache = httpCache; + this.gitoriousUrl = new URI(gitoriousUrl).toASCIIString(); + this.projectsUri = gitoriousUrl + "/projects.xml"; + } + + public List findProjects() throws Exception { + System.out.println("Fetching all projects"); + int page = 1; + + List all = new ArrayList<>(); + while (true) { + System.out.println("Fetching projects XML, page=" + page); + long start = currentTimeMillis(); + HTTPRequest request = new HTTPRequest(new URI(projectsUri + "?page=" + page), GET); + HTTPResponse response = httpCache.execute(request); + long end = currentTimeMillis(); + System.out.println("Fetched XML in " + (end - start) + "ms."); + + byte[] bytes = IOUtils.toByteArray(response.getPayload().getInputStream()); + try { + Document doc = xmlReader.readDocument(new ByteArrayInputStream(bytes)); + + List list = GitoriousProject.projectsFromXml(gitoriousUrl, doc.getRootElement()); + + // This indicates the last page. + if (list.size() == 0) { + break; + } + + System.out.println("Parsed out " + list.size() + " projects."); + all.addAll(list); + } catch (XMLStreamException e) { + System.out.println("Unable to parse XML."); + System.out.println(new String(bytes)); + } + + page++; + } + + return all; + } +} diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousDao.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousDao.java new file mode 100644 index 0000000..766a4a9 --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousDao.java @@ -0,0 +1,27 @@ +package io.trygvis.esper.testing.gitorious; + +import java.sql.*; + +public class GitoriousDao { + private final PreparedStatement countEntryId; + private final PreparedStatement insertChange; + + public GitoriousDao(Connection c) throws SQLException { + countEntryId = c.prepareStatement("SELECT count(entry_id) FROM gitorious_change WHERE entry_id=?"); + insertChange = c.prepareStatement("INSERT INTO gitorious_change(entry_id, text) VALUES(?, ?)"); + } + + public int countEntryId(String entryId) throws SQLException { + countEntryId.setString(1, entryId); + try(ResultSet rs = countEntryId.executeQuery()) { + rs.next(); + return rs.getInt(1); + } + } + + public void insertChange(String entryId, String text) throws SQLException { + insertChange.setString(1, entryId); + insertChange.setString(2, text); + insertChange.executeUpdate(); + } +} diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java new file mode 100644 index 0000000..05dfe43 --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java @@ -0,0 +1,120 @@ +package io.trygvis.esper.testing.gitorious; + +import io.trygvis.esper.testing.*; +import org.apache.abdera.*; +import org.apache.abdera.model.*; +import org.apache.abdera.protocol.client.*; +import org.apache.abdera.protocol.client.cache.*; +import org.codehaus.httpcache4j.cache.*; +import org.codehaus.httpcache4j.client.*; + +import java.sql.*; +import java.util.Date; +import java.util.*; + +public class GitoriousImporter { + private final AbderaClient abderaClient; + private final Connection connection; + private final AtomDao atomDao; + private final GitoriousDao gitoriousDao; + + public GitoriousImporter(AbderaClient abderaClient, Connection c) throws SQLException { + this.abderaClient = abderaClient; + this.connection = c; + atomDao = new AtomDao(c); + gitoriousDao = new GitoriousDao(c); + } + + public static void main(String[] args) throws Exception { + Main.configureLog4j(); + Abdera abdera = new Abdera(); + AbderaClient abderaClient = new AbderaClient(abdera, new LRUCache(abdera, 1000)); + + Connection connection = DriverManager.getConnection(DbMain.JDBC_URL, "esper", ""); + connection.setAutoCommit(false); + + HTTPCache httpCache = new HTTPCache(new MemoryCacheStorage(), HTTPClientResponseResolver.createMultithreadedInstance()); + + GitoriousClient gitoriousClient = new GitoriousClient(httpCache, "https://gitorious.org"); + + List projects = gitoriousClient.findProjects(); + + System.out.println("projects.size() = " + projects.size()); + for (GitoriousProject project : projects) { + System.out.println("project.repositories = " + project.repositories); + } + +// new GitoriousImporter(abderaClient, connection).work(); +// +// ScheduledThreadPoolExecutor service = new ScheduledThreadPoolExecutor(1); +// +// new ResourceManager(Equal.anyEqual(), service, 1000, new Callable>() { +// public List call() throws Exception { +// +// } +// }); + } + + private void work() throws SQLException, InterruptedException { + String url = "http://qt.gitorious.org/projects/show/qt.atom"; + + while (true) { + Timestamp lastUpdate = atomDao.getAtomFeed(url); + + System.out.println("Fetching " + url); + RequestOptions options = new RequestOptions(); + if (lastUpdate != null) { + options.setIfModifiedSince(lastUpdate); + } + + long start = System.currentTimeMillis(); + ClientResponse response = abderaClient.get(url, options); + long end = System.currentTimeMillis(); + System.out.println("Fetched in " + (end - start) + "ms"); + + // Use the server's timestamp + Date responseDate = response.getDateHeader("Date"); + + System.out.println("responseDate = " + responseDate); + + Document document = response.getDocument(); + Feed feed = (Feed) document.getRoot(); + + for (Entry entry : feed.getEntries()) { + String entryId = entry.getId().toASCIIString(); + Date published = entry.getPublished(); + String title = entry.getTitle(); + + // Validate element + if (entryId == null || published == null || title == null) { + continue; + } + + if (lastUpdate != null && lastUpdate.after(published)) { + System.out.println("Old entry: " + url + ":" + entryId); + continue; + } + + System.out.println("New entry: " + url + ":" + entryId); + if (gitoriousDao.countEntryId(entryId) == 0) { + gitoriousDao.insertChange(entryId, title); + } else { + System.out.println("Already imported entry: " + entryId); + } + } + + if (lastUpdate == null) { + System.out.println("New atom feed"); + atomDao.insertAtomFeed(url, new Timestamp(responseDate.getTime())); + } else { + System.out.println("Updating atom feed"); + atomDao.updateAtomFeed(url, lastUpdate); + } + + connection.commit(); + + System.out.println("Sleeping"); + Thread.sleep(10 * 1000); + } + } +} diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousProject.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousProject.java new file mode 100644 index 0000000..725b678 --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousProject.java @@ -0,0 +1,89 @@ +package io.trygvis.esper.testing.gitorious; + +import static org.apache.commons.lang.StringUtils.*; +import org.dom4j.*; + +import java.net.*; +import java.util.*; + +public class GitoriousProject { + public final String slug; + public final List repositories; + + public GitoriousProject(String slug, List repositories) { + this.slug = slug; + this.repositories = repositories; + } + + public static GitoriousProject fromXml(String gitoriousUrl, Element project) throws URISyntaxException { + String slug = trimToNull(project.elementText("slug")); + + if (slug == null) { + System.out.println("Missing slug"); + return null; + } + + Element repositories = project.element("repositories"); + if (repositories == null) { + System.out.println("Missing "); + return null; + } + + Element mainlines = repositories.element("mainlines"); + if (mainlines == null) { + System.out.println("Missing "); + return null; + } + + List list = (List) mainlines.elements("repository"); + List repositoryList = new ArrayList<>(list.size()); + for (Element repository : list) { + GitoriousRepository r = GitoriousRepository.fromXml(gitoriousUrl, slug, repository); + + if (r == null) { + continue; + } + + repositoryList.add(r); + } + + return new GitoriousProject(slug, repositoryList); + } + + public static List projectsFromXml(String gitoriousUrl, Element root) throws URISyntaxException { + List projects = new ArrayList<>(); + for (Element project : (List) root.elements("project")) { + + GitoriousProject p = GitoriousProject.fromXml(gitoriousUrl, project); + if (p == null) { + System.out.println(project.toString()); + continue; + } + projects.add(p); + } + + return projects; + } +} + +class GitoriousRepository { + public final String project; + public final String name; + public final URI atom; + + GitoriousRepository(String project, String name, URI atom) { + this.project = project; + this.name = name; + this.atom = atom; + } + + public static GitoriousRepository fromXml(String gitoriousUrl, String project, Element element) throws URISyntaxException { + String name = trimToNull(element.elementText("name")); + + if (name == null) { + return null; + } + + return new GitoriousRepository(project, name, new URI(gitoriousUrl + "/" + project + "/" + name + ".atom")); + } +} -- cgit v1.2.3