Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
package org.apache.spark.unsafe.types;

import javax.annotation.Nonnull;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.io.*;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.Map;
Expand All @@ -38,12 +37,13 @@
* <p>
* Note: This is not designed for general use cases, should not be used outside SQL.
*/
public final class UTF8String implements Comparable<UTF8String>, Serializable {
public final class UTF8String implements Comparable<UTF8String>, Externalizable {

// These are only updated by readExternal()
@Nonnull
private final Object base;
private final long offset;
private final int numBytes;
private Object base;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to still having them as final?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See if you can use Externalizable instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Externalizable still need them to be non-final. Should we use UNSAFE here? Not sure about the performance difference about final and non-final.

private long offset;
private int numBytes;

public Object getBaseObject() { return base; }
public long getBaseOffset() { return offset; }
Expand Down Expand Up @@ -127,6 +127,11 @@ protected UTF8String(Object base, long offset, int numBytes) {
this.numBytes = numBytes;
}

// for serialization
public UTF8String() {
this(null, 0, 0);
}

/**
* Writes the content of this string into a memory address, identified by an object and an offset.
* The target memory address must already been allocated, and have enough space to hold all the
Expand Down Expand Up @@ -978,4 +983,18 @@ public UTF8String soundex() {
}
return UTF8String.fromBytes(sx);
}

public void writeExternal(ObjectOutput out) throws IOException {
byte[] bytes = getBytes();
out.writeInt(bytes.length);
out.write(bytes);
}

public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
offset = BYTE_ARRAY_OFFSET;
numBytes = in.readInt();
base = new byte[numBytes];
in.readFully((byte[]) base);
}

}