diff options
Diffstat (limited to 'src/main/java/io/trygvis/esper/testing/gitorious')
4 files changed, 300 insertions, 0 deletions
diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java new file mode 100644 index 0000000..9479faa --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousClient.java @@ -0,0 +1,64 @@ +package io.trygvis.esper.testing.gitorious; + +import static java.lang.System.*; +import org.apache.commons.io.*; +import static org.codehaus.httpcache4j.HTTPMethod.*; +import org.codehaus.httpcache4j.*; +import org.codehaus.httpcache4j.cache.*; +import org.dom4j.*; +import org.dom4j.io.*; + +import javax.xml.stream.*; +import java.io.*; +import java.net.*; +import java.util.*; + +public class GitoriousClient { + public static final STAXEventReader xmlReader = new STAXEventReader(); + private final HTTPCache httpCache; + private final String gitoriousUrl; + private final String projectsUri; + + public GitoriousClient(HTTPCache httpCache, String gitoriousUrl) throws URISyntaxException { + this.httpCache = httpCache; + this.gitoriousUrl = new URI(gitoriousUrl).toASCIIString(); + this.projectsUri = gitoriousUrl + "/projects.xml"; + } + + public List<GitoriousProject> findProjects() throws Exception { + System.out.println("Fetching all projects"); + int page = 1; + + List<GitoriousProject> all = new ArrayList<>(); + while (true) { + System.out.println("Fetching projects XML, page=" + page); + long start = currentTimeMillis(); + HTTPRequest request = new HTTPRequest(new URI(projectsUri + "?page=" + page), GET); + HTTPResponse response = httpCache.execute(request); + long end = currentTimeMillis(); + System.out.println("Fetched XML in " + (end - start) + "ms."); + + byte[] bytes = IOUtils.toByteArray(response.getPayload().getInputStream()); + try { + Document doc = xmlReader.readDocument(new ByteArrayInputStream(bytes)); + + List<GitoriousProject> list = GitoriousProject.projectsFromXml(gitoriousUrl, doc.getRootElement()); + + // This indicates the last page. + if (list.size() == 0) { + break; + } + + System.out.println("Parsed out " + list.size() + " projects."); + all.addAll(list); + } catch (XMLStreamException e) { + System.out.println("Unable to parse XML."); + System.out.println(new String(bytes)); + } + + page++; + } + + return all; + } +} diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousDao.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousDao.java new file mode 100644 index 0000000..766a4a9 --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousDao.java @@ -0,0 +1,27 @@ +package io.trygvis.esper.testing.gitorious; + +import java.sql.*; + +public class GitoriousDao { + private final PreparedStatement countEntryId; + private final PreparedStatement insertChange; + + public GitoriousDao(Connection c) throws SQLException { + countEntryId = c.prepareStatement("SELECT count(entry_id) FROM gitorious_change WHERE entry_id=?"); + insertChange = c.prepareStatement("INSERT INTO gitorious_change(entry_id, text) VALUES(?, ?)"); + } + + public int countEntryId(String entryId) throws SQLException { + countEntryId.setString(1, entryId); + try(ResultSet rs = countEntryId.executeQuery()) { + rs.next(); + return rs.getInt(1); + } + } + + public void insertChange(String entryId, String text) throws SQLException { + insertChange.setString(1, entryId); + insertChange.setString(2, text); + insertChange.executeUpdate(); + } +} diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java new file mode 100644 index 0000000..05dfe43 --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousImporter.java @@ -0,0 +1,120 @@ +package io.trygvis.esper.testing.gitorious; + +import io.trygvis.esper.testing.*; +import org.apache.abdera.*; +import org.apache.abdera.model.*; +import org.apache.abdera.protocol.client.*; +import org.apache.abdera.protocol.client.cache.*; +import org.codehaus.httpcache4j.cache.*; +import org.codehaus.httpcache4j.client.*; + +import java.sql.*; +import java.util.Date; +import java.util.*; + +public class GitoriousImporter { + private final AbderaClient abderaClient; + private final Connection connection; + private final AtomDao atomDao; + private final GitoriousDao gitoriousDao; + + public GitoriousImporter(AbderaClient abderaClient, Connection c) throws SQLException { + this.abderaClient = abderaClient; + this.connection = c; + atomDao = new AtomDao(c); + gitoriousDao = new GitoriousDao(c); + } + + public static void main(String[] args) throws Exception { + Main.configureLog4j(); + Abdera abdera = new Abdera(); + AbderaClient abderaClient = new AbderaClient(abdera, new LRUCache(abdera, 1000)); + + Connection connection = DriverManager.getConnection(DbMain.JDBC_URL, "esper", ""); + connection.setAutoCommit(false); + + HTTPCache httpCache = new HTTPCache(new MemoryCacheStorage(), HTTPClientResponseResolver.createMultithreadedInstance()); + + GitoriousClient gitoriousClient = new GitoriousClient(httpCache, "https://gitorious.org"); + + List<GitoriousProject> projects = gitoriousClient.findProjects(); + + System.out.println("projects.size() = " + projects.size()); + for (GitoriousProject project : projects) { + System.out.println("project.repositories = " + project.repositories); + } + +// new GitoriousImporter(abderaClient, connection).work(); +// +// ScheduledThreadPoolExecutor service = new ScheduledThreadPoolExecutor(1); +// +// new ResourceManager<URL, URL>(Equal.<URL>anyEqual(), service, 1000, new Callable<List<URL>>() { +// public List<URL> call() throws Exception { +// +// } +// }); + } + + private void work() throws SQLException, InterruptedException { + String url = "http://qt.gitorious.org/projects/show/qt.atom"; + + while (true) { + Timestamp lastUpdate = atomDao.getAtomFeed(url); + + System.out.println("Fetching " + url); + RequestOptions options = new RequestOptions(); + if (lastUpdate != null) { + options.setIfModifiedSince(lastUpdate); + } + + long start = System.currentTimeMillis(); + ClientResponse response = abderaClient.get(url, options); + long end = System.currentTimeMillis(); + System.out.println("Fetched in " + (end - start) + "ms"); + + // Use the server's timestamp + Date responseDate = response.getDateHeader("Date"); + + System.out.println("responseDate = " + responseDate); + + Document<Element> document = response.getDocument(); + Feed feed = (Feed) document.getRoot(); + + for (Entry entry : feed.getEntries()) { + String entryId = entry.getId().toASCIIString(); + Date published = entry.getPublished(); + String title = entry.getTitle(); + + // Validate element + if (entryId == null || published == null || title == null) { + continue; + } + + if (lastUpdate != null && lastUpdate.after(published)) { + System.out.println("Old entry: " + url + ":" + entryId); + continue; + } + + System.out.println("New entry: " + url + ":" + entryId); + if (gitoriousDao.countEntryId(entryId) == 0) { + gitoriousDao.insertChange(entryId, title); + } else { + System.out.println("Already imported entry: " + entryId); + } + } + + if (lastUpdate == null) { + System.out.println("New atom feed"); + atomDao.insertAtomFeed(url, new Timestamp(responseDate.getTime())); + } else { + System.out.println("Updating atom feed"); + atomDao.updateAtomFeed(url, lastUpdate); + } + + connection.commit(); + + System.out.println("Sleeping"); + Thread.sleep(10 * 1000); + } + } +} diff --git a/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousProject.java b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousProject.java new file mode 100644 index 0000000..725b678 --- /dev/null +++ b/src/main/java/io/trygvis/esper/testing/gitorious/GitoriousProject.java @@ -0,0 +1,89 @@ +package io.trygvis.esper.testing.gitorious; + +import static org.apache.commons.lang.StringUtils.*; +import org.dom4j.*; + +import java.net.*; +import java.util.*; + +public class GitoriousProject { + public final String slug; + public final List<GitoriousRepository> repositories; + + public GitoriousProject(String slug, List<GitoriousRepository> repositories) { + this.slug = slug; + this.repositories = repositories; + } + + public static GitoriousProject fromXml(String gitoriousUrl, Element project) throws URISyntaxException { + String slug = trimToNull(project.elementText("slug")); + + if (slug == null) { + System.out.println("Missing slug"); + return null; + } + + Element repositories = project.element("repositories"); + if (repositories == null) { + System.out.println("Missing <repositories>"); + return null; + } + + Element mainlines = repositories.element("mainlines"); + if (mainlines == null) { + System.out.println("Missing <mainlines>"); + return null; + } + + List<Element> list = (List<Element>) mainlines.elements("repository"); + List<GitoriousRepository> repositoryList = new ArrayList<>(list.size()); + for (Element repository : list) { + GitoriousRepository r = GitoriousRepository.fromXml(gitoriousUrl, slug, repository); + + if (r == null) { + continue; + } + + repositoryList.add(r); + } + + return new GitoriousProject(slug, repositoryList); + } + + public static List<GitoriousProject> projectsFromXml(String gitoriousUrl, Element root) throws URISyntaxException { + List<GitoriousProject> projects = new ArrayList<>(); + for (Element project : (List<Element>) root.elements("project")) { + + GitoriousProject p = GitoriousProject.fromXml(gitoriousUrl, project); + if (p == null) { + System.out.println(project.toString()); + continue; + } + projects.add(p); + } + + return projects; + } +} + +class GitoriousRepository { + public final String project; + public final String name; + public final URI atom; + + GitoriousRepository(String project, String name, URI atom) { + this.project = project; + this.name = name; + this.atom = atom; + } + + public static GitoriousRepository fromXml(String gitoriousUrl, String project, Element element) throws URISyntaxException { + String name = trimToNull(element.elementText("name")); + + if (name == null) { + return null; + } + + return new GitoriousRepository(project, name, new URI(gitoriousUrl + "/" + project + "/" + name + ".atom")); + } +} |