Skip to content

Commit

Permalink
Implement memento link headers and timemap #11
Browse files Browse the repository at this point in the history
  • Loading branch information
ato committed Feb 21, 2019
1 parent 07de615 commit a43d498
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 23 deletions.
2 changes: 1 addition & 1 deletion src/org/netpreserve/jwarc/HttpRequest.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ void serializeHeaderTo(Appendable output) throws IOException {

public static HttpRequest parse(ReadableByteChannel channel) throws IOException {
ByteBuffer buffer = ByteBuffer.allocate(8192);
buffer.flip();
return parse(channel, buffer);
}

static HttpRequest parse(ReadableByteChannel channel, ByteBuffer buffer) throws IOException {
buffer.flip();
ParseHandler handler = new ParseHandler();
HttpParser parser = new HttpParser(handler);
parser.requestOnly();
Expand Down
1 change: 1 addition & 0 deletions src/org/netpreserve/jwarc/HttpServer.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ void listen() throws IOException {
*/
private void interact(Socket socket, String prefix) {
ByteBuffer buffer = ByteBuffer.allocate(8192);
buffer.flip();
try {
while (!socket.isInputShutdown()) {
HttpRequest request;
Expand Down
85 changes: 63 additions & 22 deletions src/org/netpreserve/jwarc/WarcServer.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@
*/
class WarcServer {
private static final DateTimeFormatter ARC_DATE = DateTimeFormatter.ofPattern("yyyyMMddHHmmss").withZone(UTC);
private static final DateTimeFormatter RFC_1123_UTC = RFC_1123_DATE_TIME.withZone(UTC);
private static final MediaType HTML = MediaType.parse("text/html");
private static final MediaType LINK_FORMAT = MediaType.parse("application/link-format");
private static final Pattern REPLAY_RE = Pattern.compile("/replay/([0-9]{14})/(.*)");

private final HttpServer httpServer;
private final Index index = new Index();
private byte[] script = "<!doctype html><script src='/__jwarc__/inject.js'></script>".getBytes(US_ASCII);
private byte[] script = "<!doctype html><script src='/__jwarc__/inject.js'></script>\n".getBytes(US_ASCII);
private Entry entrypoint;

WarcServer(ServerSocket serverSocket, List<Path> warcs) throws IOException {
Expand Down Expand Up @@ -93,27 +95,47 @@ private void handle(Socket socket, String target, HttpRequest request) throws Ex
.body(HTML, script)
.setHeader("Connection", "close")
.build());
return;
}
Matcher m = REPLAY_RE.matcher(target);
if (!m.matches()) {
error(socket, 404, "Malformed replay url");
return;
}
Instant date = Instant.from(ARC_DATE.parse(m.group(1)));
replay(socket, m.group(2), date, true);
replay(socket, m.group(2), date, false);
} else if (target.startsWith("/timemap/")) {
URI uri = URI.create(target.substring("/timemap/".length()));
NavigableSet<Entry> versions = index.query(uri);
if (versions.isEmpty()) {
error(socket, 404, "Not found in archive");
return;
}
StringBuilder sb = new StringBuilder();
sb.append("<").append(versions.first().uri).append(">;rel=\"original\"");
for (Entry entry : versions) {
sb.append(",\n</replay/").append(ARC_DATE.format(entry.date)).append("/").append(entry.uri)
.append(">;rel=\"memento\",datetime=\"").append(RFC_1123_UTC.format(entry.date) + "\"");
}
sb.append("\n");
send(socket, new HttpResponse.Builder(200, "OK")
.body(LINK_FORMAT, sb.toString().getBytes(UTF_8)).build());
} else {
Instant date = request.headers().first("Accept-Datetime")
.map(s -> Instant.from(RFC_1123_DATE_TIME.parse(s)))
.map(s -> Instant.from(RFC_1123_UTC.parse(s)))
.orElse(Instant.EPOCH);
replay(socket, target, date, false);
replay(socket, target, date, true);
}
}

private void replay(Socket socket, String target, Instant date, boolean inject) throws IOException {
Entry entry = index.closest(URI.create(target), date);
if (entry == null) {
private void replay(Socket socket, String target, Instant date, boolean proxy) throws IOException {
URI uri = URI.create(target);
NavigableSet<Entry> versions = index.query(uri);
if (versions.isEmpty()) {
error(socket, 404, "Not found in archive");
return;
}
Entry entry = closest(versions, uri, date);
try (FileChannel channel = FileChannel.open(entry.file, READ)) {
channel.position(entry.position);
WarcReader reader = new WarcReader(channel);
Expand All @@ -129,16 +151,36 @@ private void replay(Socket socket, String target, Instant date, boolean inject)
}
}
b.setHeader("Connection", "keep-alive");
b.setHeader("Memento-Datetime", RFC_1123_DATE_TIME.format(record.date().atOffset(UTC)));
b.setHeader("Memento-Datetime", RFC_1123_UTC.format(record.date()));
if (!proxy) b.setHeader("Link", mementoLinks(versions, entry));
if (proxy) b.setHeader("Vary", "Accept-Datetime");
MessageBody body = http.body();
if (inject && HTML.equals(http.contentType().base())) {
if (!proxy && HTML.equals(http.contentType().base())) {
body = LengthedBody.create(body, ByteBuffer.wrap(script), script.length + body.size());
}
b.body(http.contentType(), body, body.size());
send(socket, b.build());
}
}

private String mementoLinks(NavigableSet<Entry> versions, Entry current) {
StringBuilder sb = new StringBuilder();
sb.append("<").append(current.uri).append(">;rel=\"original\",");
sb.append("</timemap/").append(current.uri).append(">;rel=\"timemap\";type=\"").append(LINK_FORMAT).append('"');
mementoLink(sb, "first ", current, versions.first());
mementoLink(sb, "prev ", current, versions.lower(current));
mementoLink(sb, "next ", current, versions.higher(current));
mementoLink(sb, "last ", current, versions.last());
return sb.toString();
}

private void mementoLink(StringBuilder sb, String rel, Entry current, Entry entry) {
if (entry == null || entry.date.equals(current.date)) return;
if (sb.length() != 0) sb.append(',');
sb.append("</replay/").append(ARC_DATE.format(entry.date)).append("/").append(entry.uri).append(">;rel=\"")
.append(rel).append("memento\";datetime=\"").append(RFC_1123_UTC.format(entry.date)).append("\"");
}

private void error(Socket socket, int status, String reason) throws IOException {
send(socket, new HttpResponse.Builder(status, reason)
.body(HTML, reason.getBytes(UTF_8))
Expand All @@ -157,26 +199,25 @@ private void serve(Socket socket, String resource) throws IOException {
}
}

private Entry closest(NavigableSet<Entry> versions, URI uri, Instant date) {
Entry key = new Entry(uri, date);
Entry a = versions.floor(key);
Entry b = versions.higher(key);
if (a == null) return b;
if (b == null) return a;
Duration da = Duration.between(a.date, date);
Duration db = Duration.between(b.date, date);
return da.compareTo(db) < 0 ? a : b;
}

private static class Index {
NavigableSet<Entry> entries = new TreeSet<>(comparing((Entry e) -> e.urikey).thenComparing(e -> e.date));

void add(Entry entry) {
entries.add(entry);
}

Entry closest(URI uri, Instant date) {
NavigableSet<Entry> versions = versions(uri);
Entry key = new Entry(uri, date);
Entry a = versions.floor(key);
Entry b = versions.higher(key);
if (a == null) return b;
if (b == null) return a;
Duration da = Duration.between(a.date, date);
Duration db = Duration.between(b.date, date);
return da.compareTo(db) < 0 ? a : b;
}

NavigableSet<Entry> versions(URI uri) {
NavigableSet<Entry> query(URI uri) {
return entries.subSet(new Entry(uri, Instant.MIN), true, new Entry(uri, Instant.MAX), true);
}
}
Expand Down

0 comments on commit a43d498

Please sign in to comment.