Skip to content
This repository has been archived by the owner on Jan 7, 2018. It is now read-only.

Commit

Permalink
Fix logging errors when UTF-8 characters were part of URL.
Browse files Browse the repository at this point in the history
If UTF-8 characters were part of the URL, then the way nginx encodes
those characters in its logs was causing the JSON parsing to fail, which
meant these requests weren't being logged.

Also add integration tests to ensure backslashes are passed through
properly in URL due to node.js bug. Fixed in gatekeeper by:
NREL/api-umbrella-gatekeeper@f338714
  • Loading branch information
GUI committed Mar 7, 2015
1 parent 8fa5fa6 commit 673ca14
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 2 deletions.
28 changes: 28 additions & 0 deletions lib/router_log_listener/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,41 @@ _.extend(Worker.prototype, {
message = message.replace(/\\x22/g, '\\"');
message = message.replace(/\\x5C/g, '\\\\');

// JSON.parse will bomb with the hexidecimal escaped characters that the
// nginx logs use (since these aren't valid according to JSON:
// http://stackoverflow.com/a/17961409/222487). This step replaces these
// hexidecimal sequences with their string equivalent. This approach seems
// a little ridiculous, but I can't really find another way to handle these
// character sequences.
//
// The basic approach is that a url containing "✓" will get logged as
// "\xE2\x9C\x93". This regex matches the entire sequence of hex-encoded
// characters (since they have to be matched together, since "\xE2" just by
// itself is something else). The regex does not match literal backslashes
// like "\\xE2". Once matched, we then strip the "\x" prefixes and then
// pipe it to Node's Buffer class, where we can convert from "E29C93" back
// into "✓". Now that the string is actually in UTF-8 (and no unsupported
// hex escape sequences), we can pass that to JSON.parse.
message = message.replace(/([^\\]|^)((\\x[a-fA-F0-9]{2})+)/g, function(match, p1, p2) {
var hex = p2.replace(/\\x/g, '');
return p1 + (new Buffer(hex, 'hex')).toString();
});

try {
log = JSON.parse(message);
} catch(error) {
logger.error({ err: error, message: message }, 'JSON parse error for log message');
return false;
}

// Workaround for url.parse replacing backslashes with forward slashes:
// https://github.com/joyent/node/pull/8459 This is fixed in Node 0.12, but
// since we're on Node 0.10, sidestep the issue by replacing backslashes
// with their URL-encoded version (which url.parse won't flip).
if(log.req_uri) {
log.req_uri = log.req_uri.replace(/\\/g, '%5C');
}

// Strip values that are just the string '-'. This is used in nginx logs
// to represent empty values, but we don't want to actually log this dash
// character.
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
},
"dependencies": {
"async": "0.9.0",
"api-umbrella-gatekeeper": "0.8.0",
"api-umbrella-gatekeeper": "0.8.1",
"api-umbrella-config": "0.2.1",
"bunyan": "1.2.1",
"bunyan-rollbar": "0.1.0",
Expand Down
63 changes: 63 additions & 0 deletions test/integration/logging.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ require('../test_helper');
var _ = require('lodash'),
apiUmbrellaConfig = require('api-umbrella-config'),
async = require('async'),
Curler = require('curler').Curler,
execFile = require('child_process').execFile,
Factory = require('factory-lady'),
mkdirp = require('mkdirp'),
Expand Down Expand Up @@ -71,6 +72,10 @@ describe('logging', function() {
options = null;
}

if(!uniqueQueryId) {
return done('waitForLog must be passed a uniqueQueryId parameter. Passed: ' + uniqueQueryId);
}

options = options || {};
options.timeout = options.timeout || 8500;
options.minCount = options.minCount || 1;
Expand Down Expand Up @@ -354,6 +359,10 @@ describe('logging', function() {
'Referer': 'http://example.com/"foo\'bar',
'Content-Type': 'text"\x22plain\'\\x22',
},
auth: {
user: '"foo\'bar',
pass: 'bar"foo\'',
},
});

request.get('http://localhost:9080/info/', options, function(error, response) {
Expand All @@ -362,6 +371,7 @@ describe('logging', function() {
should.not.exist(error);
record.request_referer.should.eql('http://example.com/"foo\'bar');
record.request_content_type.should.eql('text""plain\'\\x22');
record.request_basic_auth_username.should.eql('"foo\'bar');
done();
}.bind(this));
}.bind(this));
Expand All @@ -385,6 +395,59 @@ describe('logging', function() {
}.bind(this));
});

it('logs requests with utf8 characters in the URL', function(done) {
this.timeout(4500);

// Use curl and not request for these tests, since the request library
// calls url.parse which has a bug that causes backslashes to become
// forward slashes https://github.com/joyent/node/pull/8459
var curl = new Curler();
curl.request({
method: 'GET',
url: 'http://localhost:9080/info/utf8/✓/encoded_utf8/%E2%9C%93/?api_key=' + this.apiKey + '&unique_query_id=' + this.uniqueQueryId + '&utf8=✓&utf8_url_encoded=%E2%9C%93&more_utf8=¬¶ªþ¤l&more_utf8_hex=\xAC\xB6\xAA\xFE\xA4l&more_utf8_hex_lowercase=\xac\xb6\xaa\xfe\xa4l&actual_backslash_x=\\xAC\\xB6\\xAA\\xFE\\xA4l',
}, function(error, response) {
response.statusCode.should.eql(200);
waitForLog(this.uniqueQueryId, function(error, response, hit, record) {
should.not.exist(error);
record.request_query.utf8.should.eql('✓');
record.request_query.utf8_url_encoded.should.eql('✓');
record.request_query.more_utf8.should.eql('¬¶ªþ¤l');
record.request_query.more_utf8_hex.should.eql('¬¶ªþ¤l');
record.request_query.more_utf8_hex_lowercase.should.eql('¬¶ªþ¤l');
record.request_query.actual_backslash_x.should.eql('\\xAC\\xB6\\xAA\\xFE\\xA4l');
record.request_path.should.eql('/info/utf8/✓/encoded_utf8/%E2%9C%93/');
record.request_url.should.contain(record.request_path);
record.request_url.should.contain('utf8=%E2%9C%93&utf8_url_encoded=%E2%9C%93&more_utf8=%C2%AC%C2%B6%C2%AA%C3%BE%C2%A4l&more_utf8_hex=%C2%AC%C2%B6%C2%AA%C3%BE%C2%A4l&more_utf8_hex_lowercase=%C2%AC%C2%B6%C2%AA%C3%BE%C2%A4l&actual_backslash_x=%5CxAC%5CxB6%5CxAA%5CxFE%5CxA4l');
done();
});
}.bind(this));
});

it('logs requests with backslashes and slashes', function(done) {
this.timeout(4500);

// Use curl and not request for these tests, since the request library
// calls url.parse which has a bug that causes backslashes to become
// forward slashes https://github.com/joyent/node/pull/8459
var curl = new Curler();
curl.request({
method: 'GET',
url: 'http://localhost:9080/info/extra//slash/some\\backslash/encoded%5Cbackslash/encoded%2Fslash?api_key=' + this.apiKey + '&unique_query_id=' + this.uniqueQueryId + '&forward_slash=/slash&encoded_forward_slash=%2F&back_slash=\\&encoded_back_slash=%5C',
}, function(error, response) {
response.statusCode.should.eql(200);
waitForLog(this.uniqueQueryId, function(error, response, hit, record) {
should.not.exist(error);
record.request_query.forward_slash.should.eql('/slash');
record.request_query.encoded_forward_slash.should.eql('/');
record.request_query.back_slash.should.eql('\\');
record.request_query.encoded_back_slash.should.eql('\\');
record.request_path.should.eql('/info/extra//slash/some%5Cbackslash/encoded%5Cbackslash/encoded%2Fslash');
record.request_url.should.contain(record.request_path);
done();
}.bind(this));
}.bind(this));
});

it('logs requests that exceed the nginx-level rate limits', function(done) {
this.timeout(10000);
async.times(100, function(index, callback) {
Expand Down
47 changes: 47 additions & 0 deletions test/integration/proxying.js
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,53 @@ describe('proxying', function() {
});
});

describe('url encoding', function() {
it('passes utf8 characters in the URL', function(done) {
// Use curl and not request for these tests, since the request library
// calls url.parse which has a bug that causes backslashes to become
// forward slashes https://github.com/joyent/node/pull/8459
var curl = new Curler();
curl.request({
method: 'GET',
url: 'http://localhost:9080/info/utf8/✓/encoded_utf8/%E2%9C%93/?api_key=' + this.apiKey + '&unique_query_id=' + this.uniqueQueryId + '&utf8=✓&utf8_url_encoded=%E2%9C%93&more_utf8=¬¶ªþ¤l&more_utf8_hex=\xAC\xB6\xAA\xFE\xA4l&more_utf8_hex_lowercase=\xac\xb6\xaa\xfe\xa4l&actual_backslash_x=\\xAC\\xB6\\xAA\\xFE\\xA4l',
}, function(error, response, body) {
response.statusCode.should.eql(200);
var data = JSON.parse(body);
data.url.query.utf8.should.eql('✓');
data.url.query.utf8_url_encoded.should.eql('✓');
data.url.query.more_utf8.should.eql('¬¶ªþ¤l');
data.url.query.more_utf8_hex.should.eql('¬¶ªþ¤l');
data.url.query.more_utf8_hex_lowercase.should.eql('¬¶ªþ¤l');
data.url.query.actual_backslash_x.should.eql('\\xAC\\xB6\\xAA\\xFE\\xA4l');
data.url.pathname.should.eql('/info/utf8/✓/encoded_utf8/%E2%9C%93/');
data.raw_url.should.contain(data.url.pathname);
data.raw_url.should.contain('utf8=%E2%9C%93&utf8_url_encoded=%E2%9C%93&more_utf8=%C2%AC%C2%B6%C2%AA%C3%BE%C2%A4l&more_utf8_hex=%C2%AC%C2%B6%C2%AA%C3%BE%C2%A4l&more_utf8_hex_lowercase=%C2%AC%C2%B6%C2%AA%C3%BE%C2%A4l&actual_backslash_x=%5CxAC%5CxB6%5CxAA%5CxFE%5CxA4l');
done();
});
});

it('passes backslashes and slashes in the URL', function(done) {
// Use curl and not request for these tests, since the request library
// calls url.parse which has a bug that causes backslashes to become
// forward slashes https://github.com/joyent/node/pull/8459
var curl = new Curler();
curl.request({
method: 'GET',
url: 'http://localhost:9080/info/extra//slash/some\\backslash/encoded%5Cbackslash/encoded%2Fslash?api_key=' + this.apiKey + '&unique_query_id=' + this.uniqueQueryId + '&forward_slash=/slash&encoded_forward_slash=%2F&back_slash=\\&encoded_back_slash=%5C',
}, function(error, response, body) {
response.statusCode.should.eql(200);
var data = JSON.parse(body);
data.url.query.forward_slash.should.eql('/slash');
data.url.query.encoded_forward_slash.should.eql('/');
data.url.query.back_slash.should.eql('\\');
data.url.query.encoded_back_slash.should.eql('\\');
data.url.pathname.should.eql('/info/extra//slash/some%5Cbackslash/encoded%5Cbackslash/encoded%2Fslash');
data.raw_url.should.contain(data.url.pathname);
done();
});
});
});

describe('timeouts', function() {
it('times out quickly if a backend is down', function(done) {
this.timeout(500);
Expand Down
4 changes: 3 additions & 1 deletion test/support/example_backend_app.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ app.use(multer({
}));

app.all('/info/*', function(req, res) {
var rawUrl = req.protocol + '://' + req.hostname + req.url;
res.set('X-Received-Method', req.method);
res.json({
method: req.method,
headers: req.headers,
local_interface_ip: req.socket.localAddress,
url: url.parse(req.protocol + '://' + req.hostname + req.url, true),
raw_url: rawUrl,
url: url.parse(rawUrl, true),
});
});

Expand Down

0 comments on commit 673ca14

Please sign in to comment.