Skip to content

Commit

Permalink
[core] Refactor LookupFile.localFilePrefix to cut partition string to 20
Browse files Browse the repository at this point in the history
  • Loading branch information
JingsongLi committed Jul 31, 2024
1 parent 7c4b68e commit 26afa3e
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ public static Map<String, Object> convertSpecToInternal(
return partValues;
}

public static String paritionToString(
RowType partitionType, BinaryRow partition, String delimiter) {
public static String toSimpleString(
RowType partitionType, BinaryRow partition, String delimiter, int maxLength) {
InternalRow.FieldGetter[] getters = partitionType.fieldGetters();
StringBuilder builder = new StringBuilder();
for (int i = 0; i < getters.length; i++) {
Expand All @@ -94,6 +94,7 @@ public static String paritionToString(
builder.append(delimiter);
}
}
return builder.toString();
String result = builder.toString();
return result.substring(0, Math.min(result.length(), maxLength));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,23 @@ public class InternalRowPartitionComputerTest {
public void testPartitionToString() {
RowType rowType = RowType.of();
BinaryRow binaryRow = new BinaryRow(0);
BinaryRowWriter writer = new BinaryRowWriter(binaryRow);
assertThat(InternalRowPartitionComputer.paritionToString(rowType, binaryRow, "-"))
assertThat(InternalRowPartitionComputer.toSimpleString(rowType, binaryRow, "-", 30))
.isEqualTo("");

rowType = RowType.of(DataTypes.STRING(), DataTypes.INT());
binaryRow = new BinaryRow(2);
writer = new BinaryRowWriter(binaryRow);
BinaryRowWriter writer = new BinaryRowWriter(binaryRow);
writer.writeString(0, BinaryString.fromString("20240731"));
writer.writeInt(1, 10);
assertThat(InternalRowPartitionComputer.paritionToString(rowType, binaryRow, "-"))
assertThat(InternalRowPartitionComputer.toSimpleString(rowType, binaryRow, "-", 30))
.isEqualTo("20240731-10");

rowType = RowType.of(DataTypes.STRING(), DataTypes.INT());
binaryRow = new BinaryRow(2);
writer = new BinaryRowWriter(binaryRow);
writer.setNullAt(0);
writer.writeInt(1, 10);
assertThat(InternalRowPartitionComputer.paritionToString(rowType, binaryRow, "-"))
assertThat(InternalRowPartitionComputer.toSimpleString(rowType, binaryRow, "-", 30))
.isEqualTo("null-10");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@

package org.apache.paimon.mergetree;

import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.io.DataFileMeta;
import org.apache.paimon.lookup.LookupStoreReader;
import org.apache.paimon.options.MemorySize;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.FileIOUtils;

import org.apache.paimon.shade.caffeine2.com.github.benmanes.caffeine.cache.Cache;
Expand All @@ -37,6 +39,7 @@
import java.time.Duration;

import static org.apache.paimon.mergetree.LookupUtils.fileKibiBytes;
import static org.apache.paimon.utils.InternalRowPartitionComputer.toSimpleString;
import static org.apache.paimon.utils.Preconditions.checkArgument;

/** Lookup file for cache remote file to local. */
Expand Down Expand Up @@ -107,14 +110,12 @@ private static void removalCallback(String file, LookupFile lookupFile, RemovalC
}

public static String localFilePrefix(
String partition, int bucket, String remoteFileName, int length) {
String identifier;
if (partition.isEmpty()) {
identifier = String.format("%s-%s", bucket, remoteFileName);
RowType partitionType, BinaryRow partition, int bucket, String remoteFileName) {
if (partition.getFieldCount() == 0) {
return String.format("%s-%s", bucket, remoteFileName);
} else {
identifier = String.format("%s-%s-%s", partition, bucket, remoteFileName);
String partitionString = toSimpleString(partitionType, partition, "-", 20);
return String.format("%s-%s-%s", partitionString, bucket, remoteFileName);
}

return identifier.substring(0, Math.min(identifier.length(), length));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@
import org.apache.paimon.utils.CommitIncrement;
import org.apache.paimon.utils.FieldsComparator;
import org.apache.paimon.utils.FileStorePathFactory;
import org.apache.paimon.utils.InternalRowPartitionComputer;
import org.apache.paimon.utils.SnapshotManager;
import org.apache.paimon.utils.UserDefinedSeqComparator;

Expand All @@ -89,6 +88,7 @@
import static org.apache.paimon.CoreOptions.ChangelogProducer.FULL_COMPACTION;
import static org.apache.paimon.CoreOptions.MergeEngine.DEDUPLICATE;
import static org.apache.paimon.lookup.LookupStoreFactory.bfGenerator;
import static org.apache.paimon.mergetree.LookupFile.localFilePrefix;

/** {@link FileStoreWrite} for {@link KeyValueFileStore}. */
public class KeyValueFileStoreWrite extends MemoryFileStoreWrite<KeyValue> {
Expand Down Expand Up @@ -381,12 +381,7 @@ private <T> LookupLevels<T> createLookupLevels(
file ->
ioManager
.createChannel(
LookupFile.localFilePrefix(
InternalRowPartitionComputer.paritionToString(
partitionType, partition, "-"),
bucket,
file,
100))
localFilePrefix(partitionType, partition, bucket, file))
.getPathFile(),
lookupStoreFactory,
bfGenerator(options),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
import org.apache.paimon.options.Options;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.InternalRowPartitionComputer;
import org.apache.paimon.utils.KeyComparatorSupplier;
import org.apache.paimon.utils.Preconditions;

Expand All @@ -56,6 +55,7 @@

import static org.apache.paimon.CoreOptions.MergeEngine.DEDUPLICATE;
import static org.apache.paimon.lookup.LookupStoreFactory.bfGenerator;
import static org.apache.paimon.mergetree.LookupFile.localFilePrefix;

/** Implementation for {@link TableQuery} for caching data and file in local. */
public class LocalTableQuery implements TableQuery {
Expand Down Expand Up @@ -162,15 +162,8 @@ private void newLookupLevels(BinaryRow partition, int bucket, List<DataFileMeta>
file ->
Preconditions.checkNotNull(ioManager, "IOManager is required.")
.createChannel(
LookupFile.localFilePrefix(
InternalRowPartitionComputer
.paritionToString(
partitionType,
partition,
"-"),
bucket,
file,
100))
localFilePrefix(
partitionType, partition, bucket, file))
.getPathFile(),
lookupStoreFactory,
bfGenerator(options),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,43 @@

package org.apache.paimon.mergetree;

import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.RowType;

import org.junit.jupiter.api.Test;

import static org.apache.paimon.data.BinaryRow.EMPTY_ROW;
import static org.apache.paimon.data.BinaryRow.singleColumn;
import static org.apache.paimon.mergetree.LookupFile.localFilePrefix;
import static org.assertj.core.api.Assertions.assertThat;

/** Test for {@link LookupFile}. */
public class LookupFileTest {

@Test
public void testLocalFilePrefix() {
RowType partType = RowType.of(DataTypes.STRING());
assertThat(
LookupFile.localFilePrefix(
"2024073105",
localFilePrefix(
partType,
singleColumn("2024073105"),
10,
"data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc",
100))
"data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc"))
.isEqualTo("2024073105-10-data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc");
assertThat(
LookupFile.localFilePrefix(
"", 10, "data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc", 100))
localFilePrefix(
RowType.of(),
EMPTY_ROW,
10,
"data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc"))
.isEqualTo("10-data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc");
assertThat(
LookupFile.localFilePrefix(
"2024073105",
localFilePrefix(
partType,
singleColumn("2024073105-05-2123232313123123123213123"),
10,
"data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc",
20))
.isEqualTo("2024073105-10-data-c");
"data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc"))
.isEqualTo(
"2024073105-05-212323-10-data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc");
}
}

0 comments on commit 26afa3e

Please sign in to comment.