Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: Buffer.lastIndexOf #4846

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions doc/api/buffer.md
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,46 @@ for (var key of buf.keys()) {
// 5
```

### buf.lastIndexOf(value[, byteOffset][, encoding])

* `value` {String|Buffer|Number}
* `byteOffset` {Number} Default: `buf.length`
* `encoding` {String} Default: `'utf8'`
* Return: {Number}

Identical to [`Buffer#indexOf()`][], but searches the Buffer from back to front
instead of front to back. Returns the starting index position of `value` in
Buffer or `-1` if the Buffer does not contain `value`. The `value` can be a
String, Buffer or Number. Strings are by default interpreted as UTF8. If
`byteOffset` is provided, will return the last match that begins at or before
`byteOffset`.

```js
const buf = new Buffer('this buffer is a buffer');

buf.lastIndexOf('this');
// returns 0
buf.lastIndexOf('buffer');
// returns 17
buf.lastIndexOf(new Buffer('buffer'));
// returns 17
buf.lastIndexOf(97); // ascii for 'a'
// returns 15
buf.lastIndexOf(new Buffer('yolo'));
// returns -1
buf.lastIndexOf('buffer', 5)
// returns 5
buf.lastIndexOf('buffer', 4)
// returns -1

const utf16Buffer = new Buffer('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2');

utf16Buffer.lastIndexOf('\u03a3', null, 'ucs2');
// returns 6
utf16Buffer.lastIndexOf('\u03a3', -5, 'ucs2');
// returns 4
```

### buf.length

* {Number}
Expand Down
72 changes: 49 additions & 23 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,48 @@ Buffer.prototype.compare = function compare(target,
return binding.compareOffset(this, target, start, thisStart, end, thisEnd);
};

function slowIndexOf(buffer, val, byteOffset, encoding) {

// Finds either the first index of `val` in `buffer` at offset >= `byteOffset`,
// OR the last index of `val` in `buffer` at offset <= `byteOffset`.
//
// Arguments:
// - buffer - a Buffer to search
// - val - a string, Buffer, or number
// - byteOffset - an index into `buffer`; will be clamped to an int32
// - encoding - an optional encoding, relevant is val is a string
// - dir - true for indexOf, false for lastIndexOf
function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) {
if (typeof byteOffset === 'string') {
encoding = byteOffset;
byteOffset = undefined;
} else if (byteOffset > 0x7fffffff) {
byteOffset = 0x7fffffff;
} else if (byteOffset < -0x80000000) {
byteOffset = -0x80000000;
}
byteOffset = +byteOffset; // Coerce to Number.
if (isNaN(byteOffset)) {
// If the offset is undefined, null, NaN, "foo", etc, search whole buffer.
byteOffset = dir ? 0 : (buffer.length - 1);
}
dir = !!dir; // Cast to bool.

if (typeof val === 'string') {
if (encoding === undefined) {
return binding.indexOfString(buffer, val, byteOffset, encoding, dir);
}
return slowIndexOf(buffer, val, byteOffset, encoding, dir);
} else if (val instanceof Buffer) {
return binding.indexOfBuffer(buffer, val, byteOffset, encoding, dir);
} else if (typeof val === 'number') {
return binding.indexOfNumber(buffer, val, byteOffset, dir);
}

throw new TypeError('"val" argument must be string, number or Buffer');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

side note: more sure way here would be to pass all remaining values to C++, check if node::Buffer::HasInstance() or args[1]->IsNumber() then branch. Otherwise env()->ThrowTypeError(). Would get around potential proto munging that causes instanceof to incorrectly return true.

Don't bother making the change. Just making the note.

}


function slowIndexOf(buffer, val, byteOffset, encoding, dir) {
var loweredCase = false;
for (;;) {
switch (encoding) {
Expand All @@ -609,13 +650,13 @@ function slowIndexOf(buffer, val, byteOffset, encoding) {
case 'utf16le':
case 'utf-16le':
case 'binary':
return binding.indexOfString(buffer, val, byteOffset, encoding);
return binding.indexOfString(buffer, val, byteOffset, encoding, dir);

case 'base64':
case 'ascii':
case 'hex':
return binding.indexOfBuffer(
buffer, Buffer.from(val, encoding), byteOffset, encoding);
buffer, Buffer.from(val, encoding), byteOffset, encoding, dir);

default:
if (loweredCase) {
Expand All @@ -628,29 +669,14 @@ function slowIndexOf(buffer, val, byteOffset, encoding) {
}
}


Buffer.prototype.indexOf = function indexOf(val, byteOffset, encoding) {
if (typeof byteOffset === 'string') {
encoding = byteOffset;
byteOffset = 0;
} else if (byteOffset > 0x7fffffff) {
byteOffset = 0x7fffffff;
} else if (byteOffset < -0x80000000) {
byteOffset = -0x80000000;
}
byteOffset >>= 0;
return bidirectionalIndexOf(this, val, byteOffset, encoding, true);
};

if (typeof val === 'string') {
if (encoding === undefined) {
return binding.indexOfString(this, val, byteOffset, encoding);
}
return slowIndexOf(this, val, byteOffset, encoding);
} else if (val instanceof Buffer) {
return binding.indexOfBuffer(this, val, byteOffset, encoding);
} else if (typeof val === 'number') {
return binding.indexOfNumber(this, val, byteOffset);
}

throw new TypeError('"val" argument must be string, number or Buffer');
Buffer.prototype.lastIndexOf = function lastIndexOf(val, byteOffset, encoding) {
return bidirectionalIndexOf(this, val, byteOffset, encoding, false);
};


Expand Down
128 changes: 80 additions & 48 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -944,9 +944,44 @@ void Compare(const FunctionCallbackInfo<Value> &args) {
}


// Computes the offset for starting an indexOf or lastIndexOf search.
// Returns either a valid offset in [0...<length - 1>], ie inside the Buffer,
// or -1 to signal that there is no possible match.
int64_t IndexOfOffset(size_t length, int64_t offset_i64, bool is_forward) {
int64_t length_i64 = static_cast<int64_t>(length);
if (length_i64 == 0) {
// Empty buffer, no match.
return -1;
}
if (offset_i64 < 0) {
if (offset_i64 + length_i64 >= 0) {
// Negative offsets count backwards from the end of the buffer.
return length_i64 + offset_i64;
} else if (is_forward) {
// indexOf from before the start of the buffer: search the whole buffer.
return 0;
} else {
// lastIndexOf from before the start of the buffer: no match.
return -1;
}
} else {
if (offset_i64 < length_i64) {
// Valid positive offset.
return offset_i64;
} else if (is_forward) {
// indexOf from past the end of the buffer: no match.
return -1;
} else {
// lastIndexOf from past the end of the buffer: search the whole buffer.
return length_i64 - 1;
}
}
}

void IndexOfString(const FunctionCallbackInfo<Value>& args) {
ASSERT(args[1]->IsString());
ASSERT(args[2]->IsNumber());
ASSERT(args[4]->IsBoolean());

enum encoding enc = ParseEncoding(args.GetIsolate(),
args[3],
Expand All @@ -956,31 +991,26 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
SPREAD_ARG(args[0], ts_obj);

Local<String> needle = args[1].As<String>();
int64_t offset_i64 = args[2]->IntegerValue();
bool is_forward = args[4]->IsTrue();

const char* haystack = ts_obj_data;
const size_t haystack_length = ts_obj_length;
// Extended latin-1 characters are 2 bytes in Utf8.
const size_t needle_length =
enc == BINARY ? needle->Length() : needle->Utf8Length();


if (needle_length == 0 || haystack_length == 0) {
return args.GetReturnValue().Set(-1);
}

int64_t offset_i64 = args[2]->IntegerValue();
size_t offset = 0;

if (offset_i64 < 0) {
if (offset_i64 + static_cast<int64_t>(haystack_length) < 0) {
offset = 0;
} else {
offset = static_cast<size_t>(haystack_length + offset_i64);
}
} else {
offset = static_cast<size_t>(offset_i64);
int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, is_forward);
if (opt_offset <= -1) {
return args.GetReturnValue().Set(-1);
}

if (haystack_length < offset || needle_length + offset > haystack_length) {
size_t offset = static_cast<size_t>(opt_offset);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a paranoid person. Mind adding a CHECK_LT(offset, haystack_length); just after this cast? Then also changing this conditional above from opt_offset == -1 to opt_offset <= -1. That should help handle accidental under/overflow cases.

There's one other place where this is applicable. Will make a note when I get to it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, done

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@trevnorris added CHECK_LT in the three places where I factored out IndexOfOffset

CHECK_LT(offset, haystack_length);
if (is_forward && needle_length + offset > haystack_length) {
return args.GetReturnValue().Set(-1);
}

Expand Down Expand Up @@ -1008,13 +1038,15 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
haystack_length / 2,
decoded_string,
decoder.size() / 2,
offset / 2);
offset / 2,
is_forward);
} else {
result = SearchString(reinterpret_cast<const uint16_t*>(haystack),
haystack_length / 2,
reinterpret_cast<const uint16_t*>(*needle_value),
needle_value.length(),
offset / 2);
offset / 2,
is_forward);
}
result *= 2;
} else if (enc == UTF8) {
Expand All @@ -1026,7 +1058,8 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
haystack_length,
reinterpret_cast<const uint8_t*>(*needle_value),
needle_length,
offset);
offset,
is_forward);
} else if (enc == BINARY) {
uint8_t* needle_data = static_cast<uint8_t*>(malloc(needle_length));
if (needle_data == nullptr) {
Expand All @@ -1039,7 +1072,8 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
haystack_length,
needle_data,
needle_length,
offset);
offset,
is_forward);
free(needle_data);
}

Expand All @@ -1050,17 +1084,18 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
ASSERT(args[1]->IsObject());
ASSERT(args[2]->IsNumber());
ASSERT(args[4]->IsBoolean());

enum encoding enc = ParseEncoding(args.GetIsolate(),
args[3],
UTF8);

THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to place THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);. The value passed to lastIndexOf() could have had it's prototype messed with, resulting in the following:

$ ./node -e 'Buffer(4).lastIndexOf({__proto__: Buffer.prototype})'
node: ../src/node_buffer.cc:985: void node::Buffer::IndexOfBuffer(const FunctionCallbackInfo<v8::Value> &): Assertion `(args[1])->IsUint8Array()' failed.
Aborted (core dumped)

And even though they're messing around in a way they shouldn't, it still shouldn't be possible to cause an abort using the JS API.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, i can add that, but how did this work before? IndexOfBuffer was used by indexOf before this PR and didn't do the check

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@trevnorris added

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup. It crashed before. Thanks for bringing it to my attention. :)

THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_ARG(args[1], buf);

if (buf_length > 0)
CHECK_NE(buf_data, nullptr);
int64_t offset_i64 = args[2]->IntegerValue();
bool is_forward = args[4]->IsTrue();

const char* haystack = ts_obj_data;
const size_t haystack_length = ts_obj_length;
Expand All @@ -1071,19 +1106,13 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
return args.GetReturnValue().Set(-1);
}

int64_t offset_i64 = args[2]->IntegerValue();
size_t offset = 0;

if (offset_i64 < 0) {
if (offset_i64 + static_cast<int64_t>(haystack_length) < 0)
offset = 0;
else
offset = static_cast<size_t>(haystack_length + offset_i64);
} else {
offset = static_cast<size_t>(offset_i64);
int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, is_forward);
if (opt_offset <= -1) {
return args.GetReturnValue().Set(-1);
}

if (haystack_length < offset || needle_length + offset > haystack_length) {
size_t offset = static_cast<size_t>(opt_offset);
CHECK_LT(offset, haystack_length);
if (is_forward && needle_length + offset > haystack_length) {
return args.GetReturnValue().Set(-1);
}

Expand All @@ -1098,15 +1127,17 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
haystack_length / 2,
reinterpret_cast<const uint16_t*>(needle),
needle_length / 2,
offset / 2);
offset / 2,
is_forward);
result *= 2;
} else {
result = SearchString(
reinterpret_cast<const uint8_t*>(haystack),
haystack_length,
reinterpret_cast<const uint8_t*>(needle),
needle_length,
offset);
offset,
is_forward);
}

args.GetReturnValue().Set(
Expand All @@ -1116,28 +1147,29 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
void IndexOfNumber(const FunctionCallbackInfo<Value>& args) {
ASSERT(args[1]->IsNumber());
ASSERT(args[2]->IsNumber());
ASSERT(args[3]->IsBoolean());

THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
SPREAD_ARG(args[0], ts_obj);

uint32_t needle = args[1]->Uint32Value();
int64_t offset_i64 = args[2]->IntegerValue();
size_t offset;

if (offset_i64 < 0) {
if (offset_i64 + static_cast<int64_t>(ts_obj_length) < 0)
offset = 0;
else
offset = static_cast<size_t>(ts_obj_length + offset_i64);
} else {
offset = static_cast<size_t>(offset_i64);
}
bool is_forward = args[3]->IsTrue();

if (ts_obj_length == 0 || offset + 1 > ts_obj_length)
int64_t opt_offset = IndexOfOffset(ts_obj_length, offset_i64, is_forward);
if (opt_offset <= -1) {
return args.GetReturnValue().Set(-1);
}
size_t offset = static_cast<size_t>(opt_offset);
CHECK_LT(offset, ts_obj_length);

void* ptr = memchr(ts_obj_data + offset, needle, ts_obj_length - offset);
char* ptr_char = static_cast<char*>(ptr);
const void* ptr;
if (is_forward) {
ptr = memchr(ts_obj_data + offset, needle, ts_obj_length - offset);
} else {
ptr = node::stringsearch::MemrchrFill(ts_obj_data, needle, offset + 1);
}
const char* ptr_char = static_cast<const char*>(ptr);
args.GetReturnValue().Set(ptr ? static_cast<int>(ptr_char - ts_obj_data)
: -1);
}
Expand Down
Loading