diff --git a/lib/header.js b/lib/header.js index 23637a5..6cb90b1 100644 --- a/lib/header.js +++ b/lib/header.js @@ -1,41 +1,75 @@ -// Generated by CoffeeScript 1.7.1 +// Generated by CoffeeScript 1.9.2 (function() { var Header, fs, - __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }; + bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }; fs = require('fs'); Header = (function() { - function Header(filename) { - this.filename = filename; - this.parseFieldSubRecord = __bind(this.parseFieldSubRecord, this); - this.parseDate = __bind(this.parseDate, this); + function Header(stream) { + this.stream = stream; + this.parseFieldSubRecord = bind(this.parseFieldSubRecord, this); + this.parseDate = bind(this.parseDate, this); return this; } Header.prototype.parse = function(callback) { - return fs.readFile(this.filename, (function(_this) { - return function(err, buffer) { - var i; - if (err) { - throw err; + var _STATE_DONE, _STATE_FIELDS, _STATE_FINISHING, _STATE_HEADER, doParse; + _STATE_HEADER = 1; + _STATE_FIELDS = 2; + _STATE_FINISHING = 3; + _STATE_DONE = 4; + this.state = _STATE_HEADER; + this.index = 0; + this.fields = []; + doParse = (function(_this) { + return function() { + var buffer, delta, fieldHeaderSize, ranout; + ranout = false; + if (_this.state === _STATE_HEADER) { + buffer = _this.stream.read(32); + if (buffer === null) { + return; + } + _this.index = 32; + _this.type = (buffer.slice(0, 1)).toString('utf-8'); + _this.dateUpdated = _this.parseDate(buffer.slice(1, 4)); + _this.numberOfRecords = _this.convertBinaryToInteger(buffer.slice(4, 8)); + _this.start = _this.convertBinaryToInteger(buffer.slice(8, 10)); + _this.recordLength = _this.convertBinaryToInteger(buffer.slice(10, 12)); + _this.state = _STATE_FIELDS; + } + if (_this.state === _STATE_FIELDS) { + fieldHeaderSize = 32; + while (buffer = _this.stream.read(fieldHeaderSize)) { + _this.index += fieldHeaderSize; + if (buffer[0] === 0x0D) { + _this.state = _STATE_FINISHING; + break; + } + _this.fields.push(_this.parseFieldSubRecord(buffer)); + } } - _this.type = (buffer.slice(0, 1)).toString('utf-8'); - _this.dateUpdated = _this.parseDate(buffer.slice(1, 4)); - _this.numberOfRecords = _this.convertBinaryToInteger(buffer.slice(4, 8)); - _this.start = _this.convertBinaryToInteger(buffer.slice(8, 10)); - _this.recordLength = _this.convertBinaryToInteger(buffer.slice(10, 12)); - _this.fields = ((function() { - var _i, _ref, _results; - _results = []; - for (i = _i = 32, _ref = this.start - 32; _i <= _ref; i = _i += 32) { - _results.push(buffer.slice(i, i + 32)); + if (_this.state === _STATE_FINISHING) { + delta = _this.start - _this.index; + if (delta > 0) { + buffer = _this.stream.read(delta); + if (buffer === null) { + return; + } + } else if (delta < 0) { + buffer = buffer.slice(delta); + _this.stream.unshift(buffer); } - return _results; - }).call(_this)).map(_this.parseFieldSubRecord); - return callback(_this); + _this.state = _STATE_DONE; + } + if (_this.state === _STATE_DONE) { + _this.stream.removeListener('readable', doParse); + return callback(_this); + } }; - })(this)); + })(this); + return this.stream.on('readable', doParse); }; Header.prototype.parseDate = function(buffer) { diff --git a/lib/parser.js b/lib/parser.js index 9556918..d405d56 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -1,9 +1,9 @@ -// Generated by CoffeeScript 1.7.1 +// Generated by CoffeeScript 1.9.2 (function() { - var EventEmitter, Header, Parser, fs, - __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }, - __hasProp = {}.hasOwnProperty, - __extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; }; + var EventEmitter, Header, Parser, fs, stream, + bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }, + extend = function(child, parent) { for (var key in parent) { if (hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; }, + hasProp = {}.hasOwnProperty; EventEmitter = require('events').EventEmitter; @@ -11,91 +11,80 @@ fs = require('fs'); - Parser = (function(_super) { - __extends(Parser, _super); + stream = require('stream'); + + Parser = (function(superClass) { + extend(Parser, superClass); function Parser(filename) { this.filename = filename; - this.parseField = __bind(this.parseField, this); - this.parseRecord = __bind(this.parseRecord, this); - this.resume = __bind(this.resume, this); - this.pause = __bind(this.pause, this); - this.parse = __bind(this.parse, this); + this.resume = bind(this.resume, this); + this.pause = bind(this.pause, this); + this.parseField = bind(this.parseField, this); + this.parseRecord = bind(this.parseRecord, this); + this.parse = bind(this.parse, this); } Parser.prototype.parse = function() { this.emit('start', this); - this.header = new Header(this.filename); + if (this.filename instanceof stream.Stream) { + stream = this.filename; + } else { + stream = fs.createReadStream(this.filename); + } + stream.once('end', (function(_this) { + return function() { + return _this.emit('end'); + }; + })(this)); + this.header = new Header(stream); this.header.parse((function(_this) { return function(err) { - var bufLoc, loc, overflow, sequenceNumber, stream; + var sequenceNumber; _this.emit('header', _this.header); sequenceNumber = 0; - loc = _this.header.start; - bufLoc = _this.header.start; - overflow = null; - _this.paused = false; - stream = fs.createReadStream(_this.filename); _this.readBuf = function() { - var buffer; + var buffer, results; if (_this.paused) { _this.emit('paused'); return; } - while (buffer = stream.read()) { - if (bufLoc !== _this.header.start) { - bufLoc = 0; - } - if (overflow !== null) { - buffer = overflow + buffer; - } - while (loc < (_this.header.start + _this.header.numberOfRecords * _this.header.recordLength) && (bufLoc + _this.header.recordLength) <= buffer.length) { - _this.emit('record', _this.parseRecord(++sequenceNumber, buffer.slice(bufLoc, bufLoc += _this.header.recordLength))); - } - loc += bufLoc; - if (bufLoc < buffer.length) { - overflow = buffer.slice(bufLoc, buffer.length); + results = []; + while (!_this.done && (buffer = stream.read(_this.header.recordLength))) { + if (buffer[0] === 0x1A) { + results.push(_this.done = true); + } else if (buffer.length === _this.header.recordLength) { + results.push(_this.emit('record', _this.parseRecord(++sequenceNumber, buffer))); } else { - overflow = null; + results.push(void 0); } - return _this; } + return results; }; stream.on('readable', _this.readBuf); - return stream.on('end', function() { - return _this.emit('end'); - }); + _this.readBuf(); + return _this; }; })(this)); return this; }; - Parser.prototype.pause = function() { - return this.paused = true; - }; - - Parser.prototype.resume = function() { - this.paused = false; - this.emit('resuming'); - return this.readBuf(); - }; - Parser.prototype.parseRecord = function(sequenceNumber, buffer) { - var field, loc, record, _fn, _i, _len, _ref; + var field, fn, i, len, loc, record, ref; record = { '@sequenceNumber': sequenceNumber, '@deleted': (buffer.slice(0, 1))[0] !== 32 }; loc = 1; - _ref = this.header.fields; - _fn = (function(_this) { + ref = this.header.fields; + fn = (function(_this) { return function(field) { return record[field.name] = _this.parseField(field, buffer.slice(loc, loc += field.length)); }; })(this); - for (_i = 0, _len = _ref.length; _i < _len; _i++) { - field = _ref[_i]; - _fn(field); + for (i = 0, len = ref.length; i < len; i++) { + field = ref[i]; + fn(field); } return record; }; @@ -109,6 +98,16 @@ return value; }; + Parser.prototype.pause = function() { + return this.paused = true; + }; + + Parser.prototype.resume = function() { + this.paused = false; + this.emit('resuming'); + return this.readBuf(); + }; + return Parser; })(EventEmitter); diff --git a/package.json b/package.json index e84016f..d49a8d0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,7 @@ + { "name": "node-dbf", - "version": "0.1.1", + "version": "0.1.2", "description": "An efficient dBase DBF file parser written in pure JavaScript", "main": "./lib/parser.js", "repository": { diff --git a/src/header.coffee b/src/header.coffee index 86080fb..763a834 100644 --- a/src/header.coffee +++ b/src/header.coffee @@ -2,25 +2,63 @@ fs = require 'fs' class Header - constructor: (@filename) -> + constructor: (@stream) -> return @ parse: (callback) -> - fs.readFile @filename, (err, buffer) => - throw err if err + _STATE_HEADER = 1 + _STATE_FIELDS = 2 + _STATE_FINISHING = 3 + _STATE_DONE = 4 + @state = _STATE_HEADER + @index = 0 + @fields = [] - @type = (buffer.slice 0, 1).toString 'utf-8' - @dateUpdated = @parseDate (buffer.slice 1, 4) - @numberOfRecords = @convertBinaryToInteger (buffer.slice 4, 8) - @start = @convertBinaryToInteger (buffer.slice 8, 10) - @recordLength = @convertBinaryToInteger (buffer.slice 10, 12) + doParse = () => + ranout = false + if @state is _STATE_HEADER + buffer = @stream.read 32 + if buffer is null + return + + @index = 32 + @type = (buffer.slice 0, 1).toString 'utf-8' + @dateUpdated = @parseDate (buffer.slice 1, 4) + @numberOfRecords = @convertBinaryToInteger (buffer.slice 4, 8) + @start = @convertBinaryToInteger (buffer.slice 8, 10) + @recordLength = @convertBinaryToInteger (buffer.slice 10, 12) - @fields = (buffer.slice i, i+32 for i in [32 .. @start - 32] by 32).map @parseFieldSubRecord + @state = _STATE_FIELDS - callback @ + if @state is _STATE_FIELDS + fieldHeaderSize = 32 + while buffer = @stream.read fieldHeaderSize + @index += fieldHeaderSize + if buffer[0] == 0x0D + @state = _STATE_FINISHING + break + @fields.push @parseFieldSubRecord buffer + + if @state is _STATE_FINISHING + delta = @start - @index + if delta > 0 + # Read up to start + buffer = @stream.read delta + if buffer is null + return + else if delta < 0 + # We read too much, so put some data back on the stream. + buffer = buffer.slice delta + @stream.unshift buffer + @state = _STATE_DONE + + if @state is _STATE_DONE + @stream.removeListener 'readable', doParse + callback @ + + @stream.on 'readable', doParse parseDate: (buffer) => - console.log @convertBinaryToInteger buffer.slice 0, 1 year = 1900 + @convertBinaryToInteger buffer.slice 0, 1 month = (@convertBinaryToInteger buffer.slice 1, 2) - 1 day = @convertBinaryToInteger buffer.slice 2, 3 @@ -39,4 +77,4 @@ class Header convertBinaryToInteger: (buffer) -> return buffer.readInt32LE 0, true -module.exports = Header \ No newline at end of file +module.exports = Header diff --git a/src/parser.coffee b/src/parser.coffee index 473763c..d96d99f 100644 --- a/src/parser.coffee +++ b/src/parser.coffee @@ -1,6 +1,7 @@ {EventEmitter} = require 'events' Header = require './header' fs = require 'fs' +stream = require 'stream' class Parser extends EventEmitter @@ -9,64 +10,40 @@ class Parser extends EventEmitter parse: => @emit 'start', @ - @header = new Header @filename - @header.parse (err) => + if @filename instanceof stream.Stream + stream = @filename + else + stream = fs.createReadStream @filename + + stream.once 'end', () => + @emit 'end' + @header = new Header stream + @header.parse (err) => @emit 'header', @header sequenceNumber = 0 - loc = @header.start - bufLoc = @header.start - overflow = null - @paused = false - - stream = fs.createReadStream @filename - @readBuf = => - if @paused - + @emit 'paused' return - - while buffer = stream.read() - - if bufLoc isnt @header.start then bufLoc = 0 - - if overflow isnt null then buffer = overflow + buffer - - while loc < (@header.start + @header.numberOfRecords * @header.recordLength) && (bufLoc + @header.recordLength) <= buffer.length - - @emit 'record', @parseRecord ++sequenceNumber, buffer.slice bufLoc, bufLoc += @header.recordLength - - loc += bufLoc - - if bufLoc < buffer.length then overflow = buffer.slice bufLoc, buffer.length else overflow = null - - return @ - + while !@done and (buffer = stream.read @header.recordLength) + if buffer[0] == 0x1A + @done = true + else if buffer.length == @header.recordLength + @emit 'record', @parseRecord ++sequenceNumber, buffer + stream.on 'readable',@readBuf - - stream.on 'end', () => - - @emit 'end' + + do @readBuf + + return @ return @ - pause: => - - @paused = true - - resume: => - - @paused = false - - @emit 'resuming' - - do @readBuf - parseRecord: (sequenceNumber, buffer) => record = { '@sequenceNumber': sequenceNumber @@ -87,4 +64,16 @@ class Parser extends EventEmitter return value + pause: => + + @paused = true + + resume: => + + @paused = false + + @emit 'resuming' + + do @readBuf + module.exports = Parser diff --git a/test/test.js b/test/test.js new file mode 100755 index 0000000..f66c5dc --- /dev/null +++ b/test/test.js @@ -0,0 +1,14 @@ +#!/usr/bin/env node +"use strict"; +var fs = require('fs'); +var DB = require(__dirname + '/../lib/parser'); + +var db = new DB(fs.createReadStream(process.argv[2])); + +db.on('header', function(x) { + console.warn("Header", x); +}); +db.on('record', console.warn); +db.on('end', function() { console.warn("Done!"); }); + +db.parse();