Skip to content

Commit

Permalink
Align Buffer instances
Browse files Browse the repository at this point in the history
This works around TooTallNate/ref#28
and may result in a general performance increase.
  • Loading branch information
addaleax committed Mar 28, 2015
1 parent 7fed565 commit e5b7528
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 9 deletions.
9 changes: 8 additions & 1 deletion lib/node-lapack/fortranArray.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ function jsMatrixToFortranArray(array, elementSize) {
var op = elementSize == 8 ? 'writeDoubleLE' : 'writeFloatLE';
var m = array.length;
var n = array[0].length;
var fortranArrayStart = fortranArray = new Buffer(m * n * elementSize);
var fortranArrayStart = fortranArray = alignedBuffer(m * n * elementSize);
for(var j = 0; j < n; j++) {
for(var i = 0; i < m; i++) {
fortranArray[op](array[i][j], elementSize * (j * m + i));
Expand All @@ -66,6 +66,13 @@ function fortranArrayToJSArray(fortranArray, n, op, elementSize) {
return array;
}

// return an buffer suitably aligned for SSE operations
function alignedBuffer(size) {
var buffer = new Buffer(size + 16);
return buffer.slice(16 - (buffer.address() % 16));
}

module.exports.fortranArrayToJSMatrix = fortranArrayToJSMatrix;
module.exports.jsMatrixToFortranArray = jsMatrixToFortranArray;
module.exports.fortranArrayToJSArray = fortranArrayToJSArray;
module.exports.alignedBuffer = alignedBuffer;
17 changes: 9 additions & 8 deletions lib/node-lapack/lapack.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ THE SOFTWARE.
*/

var fortranArray = require('./fortranArray');
var alignedBuffer = fortranArray.alignedBuffer;
var FFI = require('ffi');

var LAPACK;
Expand Down Expand Up @@ -138,19 +139,19 @@ function geqrf(matrix, op, lapackFunc, elementSize, callback) {
var qr;

matrixOp(matrix, elementSize, function(m, n, f_m, f_n, f_a, f_lda) {
var f_tau = new Buffer(m * n * elementSize);
var f_tau = alignedBuffer(m * n * elementSize);
var f_info = new Buffer(FORTRAN_INT);
var f_lwork = new Buffer(FORTRAN_INT);
var f_work;
f_lwork.writeInt32LE(-1, 0);

// get optimal size of workspace
f_work = new Buffer(FORTRAN_DOUBLE);
f_work = alignedBuffer(FORTRAN_DOUBLE);
lapackFunc(f_m, f_n, f_a, f_lda, f_tau, f_work, f_lwork, f_info);
lwork = f_work[op](0);

// allocate workspace
f_work = new Buffer(lwork * elementSize);
f_work = alignedBuffer(lwork * elementSize);
f_lwork.writeInt32LE(lwork, 0);

// perform QR decomp
Expand Down Expand Up @@ -257,18 +258,18 @@ function sgesvd(jobu, jobvt, matrix) {
var svd;

matrixOp(matrix, FORTRAN_FLOAT, function(m, n, f_m, f_n, f_a, f_lda) {
var f_s = new Buffer(Math.pow(Math.min(m, n), 2) * FORTRAN_FLOAT);
var f_u = new Buffer(Math.pow(m, 2) * FORTRAN_FLOAT);
var f_s = alignedBuffer(Math.pow(Math.min(m, n), 2) * FORTRAN_FLOAT);
var f_u = alignedBuffer(Math.pow(m, 2) * FORTRAN_FLOAT);
var f_ldu = new Buffer(FORTRAN_INT);
f_ldu.writeInt32LE(m, 0);

// TODO: punting on dims for now. revisit with http://www.netlib.org/lapack/single/sgesvd.f
var f_vt = new Buffer(Math.pow(n, 2) * FORTRAN_FLOAT);
var f_vt = alignedBuffer(Math.pow(n, 2) * FORTRAN_FLOAT);
var f_ldvt = new Buffer(FORTRAN_INT);
f_ldvt.writeInt32LE(n, 0);

var lwork = -1;
var f_work = new Buffer(FORTRAN_FLOAT);
var f_work = alignedBuffer(FORTRAN_FLOAT);
var f_lwork = new Buffer(FORTRAN_INT);
f_lwork.writeInt32LE(lwork, 0);
var f_info = new Buffer(FORTRAN_INT);
Expand All @@ -277,7 +278,7 @@ function sgesvd(jobu, jobvt, matrix) {
f_work, f_lwork, f_info);

lwork = f_work.readFloatLE(0);
f_work = new Buffer(lwork * FORTRAN_FLOAT);
f_work = alignedBuffer(lwork * FORTRAN_FLOAT);
f_lwork.writeInt32LE(lwork, 0);

LAPACK.sgesvd_(f_jobu, f_jobvt, f_m, f_n, f_a, f_lda, f_s, f_u, f_ldu, f_vt, f_ldvt,
Expand Down

0 comments on commit e5b7528

Please sign in to comment.