Skip to content

Commit

Permalink
Strip BOM when reading UTF-8-encoded files
Browse files Browse the repository at this point in the history
  • Loading branch information
mathiasbynens committed May 24, 2014
1 parent abd2fe4 commit 4c1fc02
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 12 deletions.
5 changes: 3 additions & 2 deletions lib/src/bufferFile.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
var fs = require('graceful-fs');
var stripBom = require('strip-bom');

module.exports = function (file, cb) {
fs.readFile(file.path, function (err, data) {
if (data) {
file.contents = data;
file.contents = stripBom(data);
}
cb(err, file);
});
};
};
4 changes: 3 additions & 1 deletion lib/src/streamFile.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
var fs = require('graceful-fs');
var stripBom = require('strip-bom');

module.exports = function (file, cb) {
file.contents = fs.createReadStream(file.path);
file.contents = fs.createReadStream(file.path)
.pipe(stripBom.stream());
cb(null, file);
};
17 changes: 9 additions & 8 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,23 @@
"author": "Fractal <contact@wearefractal.com> (http://wearefractal.com/)",
"main": "./index.js",
"dependencies": {
"vinyl": "^0.2.0",
"glob-stream": "^3.1.5",
"glob-watcher": "^0.0.6",
"mkdirp": "^0.3.5",
"graceful-fs": "^2.0.1",
"map-stream": "^0.1.0"
"map-stream": "^0.1.0",
"mkdirp": "^0.3.5",
"strip-bom": "^0.3.0",
"vinyl": "^0.2.0"
},
"devDependencies": {
"mocha": "^1.17.0",
"should": "^3.0.1",
"mocha-lcov-reporter": "^0.0.1",
"buffer-equal": "0.0.0",
"coveralls": "^2.6.1",
"istanbul": "^0.2.3",
"rimraf": "^2.2.5",
"jshint": "^2.4.1",
"buffer-equal": "0.0.0",
"mocha": "^1.17.0",
"mocha-lcov-reporter": "^0.0.1",
"rimraf": "^2.2.5",
"should": "^3.0.1",
"through2": "^0.4.0"
},
"scripts": {
Expand Down
Binary file added test/fixtures/bom-utf16be.txt
Binary file not shown.
Binary file added test/fixtures/bom-utf16le.txt
Binary file not shown.
1 change: 1 addition & 0 deletions test/fixtures/bom-utf8.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This file is saved as UTF-8 with BOM. 𝌆
68 changes: 67 additions & 1 deletion test/src.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,72 @@ describe('source stream', function() {
stream.end();
});

it('should strip BOM from UTF-8-encoded files', function(done) {
var expectedPath = path.join(__dirname, "./fixtures/bom-utf8.txt");
var expectedContent = fs.readFileSync(expectedPath)
// U+FEFF takes up 3 bytes in UTF-8: http://mothereff.in/utf-8#%EF%BB%BF
.slice(3);

var onEnd = function(){
buffered.length.should.equal(1);
should.exist(buffered[0].stat);
buffered[0].path.should.equal(expectedPath);
buffered[0].isBuffer().should.equal(true);
bufEqual(buffered[0].contents, expectedContent).should.equal(true);
done();
};

var stream = vfs.src("./fixtures/bom-utf8.txt", {cwd: __dirname});

var buffered = [];
bufferStream = through.obj(dataWrap(buffered.push.bind(buffered)), onEnd);
stream.pipe(bufferStream);
});

it('should not strip anything that looks like a UTF-8-encoded BOM from UTF-16-BE-encoded files', function(done) {
// Note: this goes for any non-UTF-8 encoding, but testing for UTF-16-BE
// and UTF-16-LE is enough to demonstrate this is done properly.
var expectedPath = path.join(__dirname, "./fixtures/bom-utf16be.txt");
var expectedContent = fs.readFileSync(expectedPath);

var onEnd = function(){
buffered.length.should.equal(1);
should.exist(buffered[0].stat);
buffered[0].path.should.equal(expectedPath);
buffered[0].isBuffer().should.equal(true);
bufEqual(buffered[0].contents, expectedContent).should.equal(true);
done();
};

var stream = vfs.src("./fixtures/bom-utf16be.txt", {cwd: __dirname});

var buffered = [];
bufferStream = through.obj(dataWrap(buffered.push.bind(buffered)), onEnd);
stream.pipe(bufferStream);
});

it('should not strip anything that looks like a UTF-8-encoded BOM from UTF-16-LE-encoded files', function(done) {
// Note: this goes for any non-UTF-8 encoding, but testing for UTF-16-BE
// and UTF-16-LE is enough to demonstrate this is done properly.
var expectedPath = path.join(__dirname, "./fixtures/bom-utf16le.txt");
var expectedContent = fs.readFileSync(expectedPath);

var onEnd = function(){
buffered.length.should.equal(1);
should.exist(buffered[0].stat);
buffered[0].path.should.equal(expectedPath);
buffered[0].isBuffer().should.equal(true);
bufEqual(buffered[0].contents, expectedContent).should.equal(true);
done();
};

var stream = vfs.src("./fixtures/bom-utf16le.txt", {cwd: __dirname});

var buffered = [];
bufferStream = through.obj(dataWrap(buffered.push.bind(buffered)), onEnd);
stream.pipe(bufferStream);
});

it('should glob a file with default settings', function(done) {
var expectedPath = path.join(__dirname, "./fixtures/test.coffee");
var expectedContent = fs.readFileSync(expectedPath);
Expand Down Expand Up @@ -178,7 +244,7 @@ describe('source stream', function() {
};

var stream = vfs.src("./fixtures/*.coffee", {cwd: __dirname, buffer: false});

var buffered = [];
bufferStream = through.obj(dataWrap(buffered.push.bind(buffered)), onEnd);
stream.pipe(bufferStream);
Expand Down

0 comments on commit 4c1fc02

Please sign in to comment.