Skip to content

Commit

Permalink
Fixes GH-63: BufferUtils.ensureCapacity now clears the input buffer. …
Browse files Browse the repository at this point in the history
…This

also affects WordData methods that accept a reusable byte buffer -- it is now always cleared prior to being flipped and returned.
  • Loading branch information
dweiss committed Feb 17, 2016
1 parent b48255d commit f9592c5
Show file tree
Hide file tree
Showing 13 changed files with 65 additions and 79 deletions.
6 changes: 6 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ New Features

* GH-73: Update Polish stemming dictionaries to polimorfologik 2.1. (Dawid Weiss)

Other Changes

* GH-63: BufferUtils.ensureCapacity now clears the input buffer. This also
affects WordData methods that accept a reusable byte buffer -- it is now
always cleared prior to being flipped and returned. (Dawid Weiss)

======================= morfologik-stemming 2.0.2-SNAPSHOT =======================

Bug Fixes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ public void testWordDataFields() throws IOException {
// The buffer should be present in stems set.
assertTrue(stem, stems.contains(stem));
// Buffer large enough to hold the contents.
temp.clear();
assertSame(temp, wd.getStemBytes(temp));
// The copy and the clone should be identical.
assertEquals(0, copy.compareTo(temp));
Expand Down
12 changes: 4 additions & 8 deletions morfologik-speller/src/main/java/morfologik/speller/Speller.java
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,7 @@ private ByteBuffer charsToBytes(final CharBuffer chars, ByteBuffer bytes) {

private ByteBuffer charSequenceToBytes(final CharSequence word) {
// Encode word characters into bytes in the same encoding as the FSA's.
charBuffer.clear();
charBuffer = BufferUtils.ensureCapacity(charBuffer, word.length());
charBuffer = BufferUtils.clearAndEnsureCapacity(charBuffer, word.length());
for (int i = 0; i < word.length(); i++) {
final char chr = word.charAt(i);
charBuffer.put(chr);
Expand Down Expand Up @@ -416,10 +415,8 @@ public List<String> findReplacements(final String w) {
candidate = new char[MAX_WORD_LENGTH];
candLen = candidate.length;
effectEditDistance = wordLen <= editDistance ? wordLen - 1 : editDistance;
charBuffer = BufferUtils.ensureCapacity(charBuffer, MAX_WORD_LENGTH);
byteBuffer = BufferUtils.ensureCapacity(byteBuffer, MAX_WORD_LENGTH);
charBuffer.clear();
byteBuffer.clear();
charBuffer = BufferUtils.clearAndEnsureCapacity(charBuffer, MAX_WORD_LENGTH);
byteBuffer = BufferUtils.clearAndEnsureCapacity(byteBuffer, MAX_WORD_LENGTH);
final byte[] prevBytes = new byte[0];
findRepl(0, fsa.getRootNode(), prevBytes, 0, 0);
}
Expand All @@ -443,8 +440,7 @@ private void findRepl(final int depth, final int node, final byte[] prevBytes, f
// char separatorChar = dictionaryMetadata.getSeparatorAsChar();
int dist = 0;
for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
byteBuffer = BufferUtils.ensureCapacity(byteBuffer, prevBytes.length + 1);
byteBuffer.clear();
byteBuffer = BufferUtils.clearAndEnsureCapacity(byteBuffer, prevBytes.length + 1);
byteBuffer.put(prevBytes);
byteBuffer.put(fsa.getArcLabel(arc));
final int bufPos = byteBuffer.position();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,45 @@ private BufferUtils() {
}

/**
* Ensure the byte buffer's capacity. If a new buffer is allocated, its
* content is empty (the old buffer's contents is not copied).
* Ensure the buffer's capacity is large enough to hold a given number
* of elements. If the input buffer is not large enough, a new buffer is allocated
* and returned.
*
* @param capacity The required capacity of the buffer.
* @param elements The required number of elements to be appended to the buffer.
*
* @param buffer
* The buffer to check or <code>null</code> if a new buffer should be
* allocated.
*
* @return Returns the same buffer or a new buffer with the given capacity.
*/
public static ByteBuffer ensureCapacity(ByteBuffer buffer, int capacity) {
// TODO: GH-63
if (buffer == null || buffer.capacity() < capacity) {
buffer = ByteBuffer.allocate(capacity);
public static ByteBuffer clearAndEnsureCapacity(ByteBuffer buffer, int elements) {
if (buffer == null || buffer.capacity() < elements) {
buffer = ByteBuffer.allocate(elements);
} else {
buffer.clear();
}
return buffer;
}

/**
* Ensure the char buffer's capacity. If a new buffer is allocated, its
* content is empty (the old buffer's contents is not copied).
* Ensure the buffer's capacity is large enough to hold a given number
* of elements. If the input buffer is not large enough, a new buffer is allocated
* and returned.
*
* @param capacity The required capacity of the buffer.
* @param elements The required number of elements to be appended to the buffer.
*
* @param buffer
* The buffer to check or <code>null</code> if a new buffer should be
* allocated.
*
* @return Returns the same buffer or a new buffer with the given capacity.
*/
public static CharBuffer ensureCapacity(CharBuffer buffer, int capacity) {
// TODO: GH-63
if (buffer == null || buffer.capacity() < capacity) {
buffer = CharBuffer.allocate(capacity);
public static CharBuffer clearAndEnsureCapacity(CharBuffer buffer, int elements) {
if (buffer == null || buffer.capacity() < elements) {
buffer = CharBuffer.allocate(elements);
} else {
buffer.clear();
}
return buffer;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,14 @@ public WordData next() {
throw new RuntimeException("Invalid dictionary " + "entry format (missing separator).");
}

inflectedBuffer.clear();
inflectedBuffer = BufferUtils.ensureCapacity(inflectedBuffer, sepPos);
inflectedBuffer = BufferUtils.clearAndEnsureCapacity(inflectedBuffer, sepPos);
inflectedBuffer.put(ba, 0, sepPos);
inflectedBuffer.flip();

inflectedCharBuffer = bytesToChars(inflectedBuffer, inflectedCharBuffer);
entry.update(inflectedBuffer, inflectedCharBuffer);

temp.clear();
temp = BufferUtils.ensureCapacity(temp, bbSize - sepPos);
temp = BufferUtils.clearAndEnsureCapacity(temp, bbSize - sepPos);
sepPos++;
temp.put(ba, sepPos, bbSize - sepPos);
temp.flip();
Expand All @@ -88,8 +86,7 @@ public WordData next() {
inflectedBuffer,
ByteBuffer.wrap(ba, 0, sepPos));
} else {
entry.stemBuffer = BufferUtils.ensureCapacity(entry.stemBuffer, sepPos);
entry.stemBuffer.clear();
entry.stemBuffer = BufferUtils.clearAndEnsureCapacity(entry.stemBuffer, sepPos);
entry.stemBuffer.put(ba, 0, sepPos);
entry.stemBuffer.flip();
}
Expand All @@ -102,8 +99,7 @@ public WordData next() {
/*
* Decode the tag data.
*/
entry.tagBuffer = BufferUtils.ensureCapacity(entry.tagBuffer, bbSize - sepPos);
entry.tagBuffer.clear();
entry.tagBuffer = BufferUtils.clearAndEnsureCapacity(entry.tagBuffer, bbSize - sepPos);
entry.tagBuffer.put(ba, sepPos, bbSize - sepPos);
entry.tagBuffer.flip();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,7 @@ public List<WordData> lookup(CharSequence word) {
formsList.wrap(forms, 0, 0);

// Encode word characters into bytes in the same encoding as the FSA's.
charBuffer.clear();
charBuffer = BufferUtils.ensureCapacity(charBuffer, word.length());
charBuffer = BufferUtils.clearAndEnsureCapacity(charBuffer, word.length());
for (int i = 0; i < word.length(); i++) {
char chr = word.charAt(i);
if (chr == separatorChar)
Expand Down Expand Up @@ -229,8 +228,7 @@ public List<WordData> lookup(CharSequence word) {
*/
final int tagSize = bbSize - sepPos;
if (tagSize > 0) {
wordData.tagBuffer = BufferUtils.ensureCapacity(wordData.tagBuffer, tagSize);
wordData.tagBuffer.clear();
wordData.tagBuffer = BufferUtils.clearAndEnsureCapacity(wordData.tagBuffer, tagSize);
wordData.tagBuffer.put(ba, sepPos, tagSize);
wordData.tagBuffer.flip();
}
Expand Down Expand Up @@ -278,8 +276,7 @@ public static String applyReplacements(CharSequence word, LinkedHashMap<String,
*/
private ByteBuffer charsToBytes(CharBuffer chars, ByteBuffer bytes) {
bytes.clear();
final int maxCapacity = (int) (chars.remaining() * encoder
.maxBytesPerChar());
final int maxCapacity = (int) (chars.remaining() * encoder.maxBytesPerChar());
if (bytes.capacity() <= maxCapacity) {
bytes = ByteBuffer.allocate(maxCapacity);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
public class NoEncoder implements ISequenceEncoder {
@Override
public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuffer target) {
reuse = BufferUtils.ensureCapacity(reuse, target.remaining());
reuse.clear();
reuse = BufferUtils.clearAndEnsureCapacity(reuse, target.remaining());

target.mark();
reuse.put(target)
Expand All @@ -21,8 +20,7 @@ public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuffer target)

@Override
public ByteBuffer decode(ByteBuffer reuse, ByteBuffer source, ByteBuffer encoded) {
reuse = BufferUtils.ensureCapacity(reuse, encoded.remaining());
reuse.clear();
reuse = BufferUtils.clearAndEnsureCapacity(reuse, encoded.remaining());

encoded.mark();
reuse.put(encoded)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuffer target)
// Compute temporary src with the infix removed.
// Concatenate in scratch space for simplicity.
final int len2 = source.remaining() - (i + j);
scratch = BufferUtils.ensureCapacity(scratch, i + len2);
scratch.clear();
scratch = BufferUtils.clearAndEnsureCapacity(scratch, i + len2);
scratch.put(source.array(), 0, i);
scratch.put(source.array(), i + j, len2);
scratch.flip();
Expand Down Expand Up @@ -99,8 +98,7 @@ public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuffer target)
}

final int len1 = target.remaining() - maxSubsequenceLength;
reuse = BufferUtils.ensureCapacity(reuse, 3 + len1);
reuse.clear();
reuse = BufferUtils.clearAndEnsureCapacity(reuse, 3 + len1);

reuse.put((byte) ((maxInfixIndex + 'A') & 0xFF));
reuse.put((byte) ((maxInfixLength + 'A') & 0xFF));
Expand Down Expand Up @@ -128,8 +126,7 @@ public ByteBuffer decode(ByteBuffer reuse, ByteBuffer source, ByteBuffer encoded

final int len1 = source.remaining() - (infixIndex + infixLength + truncateSuffixBytes);
final int len2 = encoded.remaining() - 3;
reuse = BufferUtils.ensureCapacity(reuse, infixIndex + len1 + len2);
reuse.clear();
reuse = BufferUtils.clearAndEnsureCapacity(reuse, infixIndex + len1 + len2);

assert encoded.hasArray() &&
encoded.position() == 0 &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuffer target)
}

final int len1 = target.remaining() - maxSubsequenceLength;
reuse = BufferUtils.ensureCapacity(reuse, 2 + len1);
reuse.clear();
reuse = BufferUtils.clearAndEnsureCapacity(reuse, 2 + len1);

assert target.hasArray() &&
target.position() == 0 &&
Expand Down Expand Up @@ -98,8 +97,7 @@ public ByteBuffer decode(ByteBuffer reuse, ByteBuffer source, ByteBuffer encoded

final int len1 = source.remaining() - (truncateSuffixBytes + truncatePrefixBytes);
final int len2 = encoded.remaining() - 2;
reuse = BufferUtils.ensureCapacity(reuse, len1 + len2);
reuse.clear();
reuse = BufferUtils.clearAndEnsureCapacity(reuse, len1 + len2);

reuse.put(source.array(), truncatePrefixBytes, len1);
reuse.put(encoded.array(), 2, len2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuffer target)
sharedPrefix = 0;
}

reuse = BufferUtils.ensureCapacity(reuse, 1 + target.remaining() - sharedPrefix);
reuse.clear();
reuse = BufferUtils.clearAndEnsureCapacity(reuse, 1 + target.remaining() - sharedPrefix);

assert target.hasArray() &&
target.position() == 0 &&
Expand All @@ -69,8 +68,7 @@ public ByteBuffer decode(ByteBuffer reuse, ByteBuffer source, ByteBuffer encoded
final int len1 = source.remaining() - truncateBytes;
final int len2 = encoded.remaining() - 1;

reuse = BufferUtils.ensureCapacity(reuse, len1 + len2);
reuse.clear();
reuse = BufferUtils.clearAndEnsureCapacity(reuse, len1 + len2);

assert source.hasArray() &&
source.position() == 0 &&
Expand Down
29 changes: 17 additions & 12 deletions morfologik-stemming/src/main/java/morfologik/stemming/WordData.java
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,11 @@ public final class WordData implements Cloneable {

/**
* Copy the stem's binary data (no charset decoding) to a custom byte
* buffer. If the buffer is null or not large enough to hold the result, a
* new buffer is allocated.
* buffer.
*
* The buffer is cleared prior to copying and flipped for reading
* upon returning from this method. If the buffer is null or not large
* enough to hold the result, a new buffer is allocated.
*
* @param target
* Target byte buffer to copy the stem buffer to or
Expand All @@ -101,8 +104,7 @@ public final class WordData implements Cloneable {
* @return Returns <code>target</code> or the new reallocated buffer.
*/
public ByteBuffer getStemBytes(ByteBuffer target) {
target = BufferUtils.ensureCapacity(target, stemBuffer.remaining());
assert target.position() == 0;
target = BufferUtils.clearAndEnsureCapacity(target, stemBuffer.remaining());
stemBuffer.mark();
target.put(stemBuffer);
stemBuffer.reset();
Expand All @@ -112,8 +114,10 @@ public ByteBuffer getStemBytes(ByteBuffer target) {

/**
* Copy the tag's binary data (no charset decoding) to a custom byte buffer.
* If the buffer is null or not large enough to hold the result, a new
* buffer is allocated.
*
* The buffer is cleared prior to copying and flipped for reading
* upon returning from this method. If the buffer is null or not large
* enough to hold the result, a new buffer is allocated.
*
* @param target
* Target byte buffer to copy the tag buffer to or
Expand All @@ -122,8 +126,7 @@ public ByteBuffer getStemBytes(ByteBuffer target) {
* @return Returns <code>target</code> or the new reallocated buffer.
*/
public ByteBuffer getTagBytes(ByteBuffer target) {
target = BufferUtils.ensureCapacity(target, tagBuffer.remaining());
assert target.position() == 0;
target = BufferUtils.clearAndEnsureCapacity(target, tagBuffer.remaining());
tagBuffer.mark();
target.put(tagBuffer);
tagBuffer.reset();
Expand All @@ -133,8 +136,11 @@ public ByteBuffer getTagBytes(ByteBuffer target) {

/**
* Copy the inflected word's binary data (no charset decoding) to a custom
* byte buffer. If the buffer is null or not large enough to hold the
* result, a new buffer is allocated.
* byte buffer.
*
* The buffer is cleared prior to copying and flipped for reading
* upon returning from this method. If the buffer is null or not large
* enough to hold the result, a new buffer is allocated.
*
* @param target
* Target byte buffer to copy the word buffer to or
Expand All @@ -143,8 +149,7 @@ public ByteBuffer getTagBytes(ByteBuffer target) {
* @return Returns <code>target</code> or the new reallocated buffer.
*/
public ByteBuffer getWordBytes(ByteBuffer target) {
target = BufferUtils.ensureCapacity(target, wordBuffer.remaining());
assert target.position() == 0;
target = BufferUtils.clearAndEnsureCapacity(target, wordBuffer.remaining());
wordBuffer.mark();
target.put(wordBuffer);
wordBuffer.reset();
Expand Down
12 changes: 4 additions & 8 deletions morfologik-tools/src/main/java/morfologik/tools/DictCompile.java
Original file line number Diff line number Diff line change
Expand Up @@ -148,29 +148,25 @@ public ExitStatus call() throws Exception {
sep2 = row.length;
}

source.clear();
source = BufferUtils.ensureCapacity(source, sep1);
source = BufferUtils.clearAndEnsureCapacity(source, sep1);
source.put(row, 0, sep1);
source.flip();

final int len = sep2 - (sep1 + 1);
target.clear();
target = BufferUtils.ensureCapacity(target, len);
target = BufferUtils.clearAndEnsureCapacity(target, len);
target.put(row, sep1 + 1, len);
target.flip();

final int len2 = row.length - (sep2 + 1);
tag.clear();
tag = BufferUtils.clearAndEnsureCapacity(tag, len2);
if (len2 > 0) {
tag = BufferUtils.ensureCapacity(tag, len2);
tag.put(row, sep2 + 1, len2);
}
tag.flip();

encoded = sequenceEncoder.encode(encoded, target, source);

assembled.clear();
assembled = BufferUtils.ensureCapacity(assembled,
assembled = BufferUtils.clearAndEnsureCapacity(assembled,
target.remaining() + 1 +
encoded.remaining() + 1 +
tag.remaining());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ public ExitStatus call() throws Exception {
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(output))) {
boolean hasTags = false;
for (WordData wd : lookup) {
tag.clear();
tag = wd.getTagBytes(tag);
if (tag.hasRemaining()) {
hasTags = true;
Expand All @@ -87,11 +86,8 @@ public ExitStatus call() throws Exception {
}

for (WordData wd : lookup) {
stem.clear();
stem = wd.getStemBytes(stem);
word.clear();
word = wd.getWordBytes(word);
tag.clear();
tag = wd.getTagBytes(tag);

write(os, stem);
Expand Down

0 comments on commit f9592c5

Please sign in to comment.