Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.exceptions;

import com.google.errorprone.annotations.FormatMethod;

/** Exception raised when an entity is not found. */
public class EntityNotFoundException extends RESTException implements CleanableFailure {
@FormatMethod
public EntityNotFoundException(String message, Object... args) {
super(message, args);
}

@FormatMethod
public EntityNotFoundException(Throwable cause, String message, Object... args) {
super(cause, message, args);
}
}
5 changes: 5 additions & 0 deletions core/src/main/java/org/apache/iceberg/BaseFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,11 @@ public StructLike partition() {
return partitionData;
}

public void setPartitionData(PartitionData partitionData) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is necessary, we should avoid it.

Copy link
Contributor

@amogh-jahagirdar amogh-jahagirdar Oct 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. @rahil-c could we just reuse the existing constructor for GenericDataFile and GenericDeleteFile and copy over everything correctly with the new partition data during the binding? We should avoid exposing unnecessary public APIs when possibke

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah BaseFile isn't public itself but still, think we should just avoid the extra method since it's possible to do everything via the constructor.

// TODO for binding in REST scan
this.partitionData = partitionData;
}

@Override
public long recordCount() {
return recordCount;
Expand Down
127 changes: 114 additions & 13 deletions core/src/main/java/org/apache/iceberg/ContentFileParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.util.JsonUtil;

class ContentFileParser {
public class ContentFileParser {
private static final String SPEC_ID = "spec-id";
private static final String CONTENT = "content";
private static final String FILE_PATH = "file-path";
Expand All @@ -48,6 +48,97 @@ class ContentFileParser {

private ContentFileParser() {}

public static void unboundContentFileToJson(
ContentFile<?> contentFile, PartitionSpec spec, JsonGenerator generator) throws IOException {
Preconditions.checkArgument(contentFile != null, "Invalid content file: null");
Preconditions.checkArgument(spec != null, "Invalid partition spec: null");
Preconditions.checkArgument(generator != null, "Invalid JSON generator: null");
Preconditions.checkArgument(
contentFile.specId() == spec.specId(),
"Invalid partition spec id from content file: expected = %s, actual = %s",
spec.specId(),
contentFile.specId());

generator.writeStartObject();
// ignore the ordinal position (ContentFile#pos) of the file in a manifest,
// as it isn't used and BaseFile constructor doesn't support it.

generator.writeNumberField(SPEC_ID, contentFile.specId());
generator.writeStringField(CONTENT, contentFile.content().name());
generator.writeStringField(FILE_PATH, contentFile.path().toString());
generator.writeStringField(FILE_FORMAT, contentFile.format().name());

if (contentFile.partition() != null) {
generator.writeFieldName(PARTITION);
SingleValueParser.toJson(spec.partitionType(), contentFile.partition(), generator);
}

generator.writeNumberField(FILE_SIZE, contentFile.fileSizeInBytes());

metricsToJson(contentFile, generator);

if (contentFile.keyMetadata() != null) {
generator.writeFieldName(KEY_METADATA);
SingleValueParser.toJson(DataFile.KEY_METADATA.type(), contentFile.keyMetadata(), generator);
}

if (contentFile.splitOffsets() != null) {
JsonUtil.writeLongArray(SPLIT_OFFSETS, contentFile.splitOffsets(), generator);
}

if (contentFile.equalityFieldIds() != null) {
JsonUtil.writeIntegerArray(EQUALITY_IDS, contentFile.equalityFieldIds(), generator);
}

if (contentFile.sortOrderId() != null) {
generator.writeNumberField(SORT_ORDER_ID, contentFile.sortOrderId());
}

generator.writeEndObject();
}

public static ContentFile<?> unboundContentFileFromJson(JsonNode jsonNode) {
Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for content file: null");

int specId = JsonUtil.getInt(SPEC_ID, jsonNode);
FileContent fileContent = FileContent.valueOf(JsonUtil.getString(CONTENT, jsonNode));
String filePath = JsonUtil.getString(FILE_PATH, jsonNode);
FileFormat fileFormat = FileFormat.fromString(JsonUtil.getString(FILE_FORMAT, jsonNode));

long fileSizeInBytes = JsonUtil.getLong(FILE_SIZE, jsonNode);
Metrics metrics = metricsFromJson(jsonNode);
ByteBuffer keyMetadata = JsonUtil.getByteBufferOrNull(KEY_METADATA, jsonNode);
List<Long> splitOffsets = JsonUtil.getLongListOrNull(SPLIT_OFFSETS, jsonNode);
int[] equalityFieldIds = JsonUtil.getIntArrayOrNull(EQUALITY_IDS, jsonNode);
Integer sortOrderId = JsonUtil.getIntOrNull(SORT_ORDER_ID, jsonNode);

if (fileContent == FileContent.DATA) {
return new UnboundGenericDataFile(
specId,
filePath,
fileFormat,
jsonNode.get(PARTITION),
fileSizeInBytes,
metrics,
keyMetadata,
splitOffsets,
sortOrderId);
} else {
return new UnboundGenericDeleteFile(
specId,
fileContent,
filePath,
fileFormat,
jsonNode.get(PARTITION),
fileSizeInBytes,
metrics,
equalityFieldIds,
sortOrderId,
splitOffsets,
keyMetadata);
}
}

private static boolean hasPartitionData(StructLike partitionData) {
return partitionData != null && partitionData.size() > 0;
}
Expand Down Expand Up @@ -125,18 +216,7 @@ static ContentFile<?> fromJson(JsonNode jsonNode, PartitionSpec spec) {

PartitionData partitionData = null;
if (jsonNode.has(PARTITION)) {
partitionData = new PartitionData(spec.partitionType());
StructLike structLike =
(StructLike) SingleValueParser.fromJson(spec.partitionType(), jsonNode.get(PARTITION));
Preconditions.checkState(
partitionData.size() == structLike.size(),
"Invalid partition data size: expected = %s, actual = %s",
partitionData.size(),
structLike.size());
for (int pos = 0; pos < partitionData.size(); ++pos) {
Class<?> javaClass = spec.partitionType().fields().get(pos).type().typeId().javaClass();
partitionData.set(pos, structLike.get(pos, javaClass));
}
partitionData = partitionDataFromRawValue(jsonNode.get(PARTITION), spec);
}

long fileSizeInBytes = JsonUtil.getLong(FILE_SIZE, jsonNode);
Expand Down Expand Up @@ -173,6 +253,27 @@ static ContentFile<?> fromJson(JsonNode jsonNode, PartitionSpec spec) {
}
}

static PartitionData partitionDataFromRawValue(JsonNode rawPartitionValue, PartitionSpec spec) {
if (rawPartitionValue == null) {
return null;
}

PartitionData partitionData = new PartitionData(spec.partitionType());
StructLike structLike =
(StructLike) SingleValueParser.fromJson(spec.partitionType(), rawPartitionValue);
Preconditions.checkState(
partitionData.size() == structLike.size(),
"Invalid partition data size: expected = %s, actual = %s",
partitionData.size(),
structLike.size());
for (int pos = 0; pos < partitionData.size(); ++pos) {
Class<?> javaClass = spec.partitionType().fields().get(pos).type().typeId().javaClass();
partitionData.set(pos, structLike.get(pos, javaClass));
}

return partitionData;
}

private static void metricsToJson(ContentFile<?> contentFile, JsonGenerator generator)
throws IOException {
generator.writeNumberField(RECORD_COUNT, contentFile.recordCount());
Expand Down
89 changes: 89 additions & 0 deletions core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonNode;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.ExpressionParser;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.util.JsonUtil;

public class RESTFileScanTaskParser {
private static final String DATA_FILE = "data-file";
private static final String DELETE_FILE_REFERENCES = "delete-file-references";
private static final String RESIDUAL = "residual-filter";

private RESTFileScanTaskParser() {}

public static void toJson(
FileScanTask fileScanTask,
Set<Integer> deleteFileReferences,
PartitionSpec partitionSpec,
JsonGenerator generator)
throws IOException {
Preconditions.checkArgument(fileScanTask != null, "Invalid file scan task: null");
Preconditions.checkArgument(generator != null, "Invalid JSON generator: null");

generator.writeStartObject();
generator.writeFieldName(DATA_FILE);
ContentFileParser.unboundContentFileToJson(fileScanTask.file(), partitionSpec, generator);
if (deleteFileReferences != null) {
JsonUtil.writeIntegerArray(DELETE_FILE_REFERENCES, deleteFileReferences, generator);
}

if (fileScanTask.residual() != null) {
generator.writeFieldName(RESIDUAL);
ExpressionParser.toJson(fileScanTask.residual(), generator);
}
generator.writeEndObject();
}

public static FileScanTask fromJson(JsonNode jsonNode, List<DeleteFile> allDeleteFiles) {
Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for file scan task: null");
Preconditions.checkArgument(
jsonNode.isObject(), "Invalid JSON node for file scan task: non-object (%s)", jsonNode);

UnboundGenericDataFile dataFile =
(UnboundGenericDataFile)
ContentFileParser.unboundContentFileFromJson(JsonUtil.get(DATA_FILE, jsonNode));

UnboundGenericDeleteFile[] deleteFiles = null;
Set<Integer> deleteFileReferences = Sets.newHashSet();
if (jsonNode.has(DELETE_FILE_REFERENCES)) {
deleteFileReferences.addAll(JsonUtil.getIntegerList(DELETE_FILE_REFERENCES, jsonNode));
ImmutableList.Builder<UnboundGenericDeleteFile> builder = ImmutableList.builder();
deleteFileReferences.forEach(
delIdx -> builder.add((UnboundGenericDeleteFile) allDeleteFiles.get(delIdx)));
deleteFiles = builder.build().toArray(new UnboundGenericDeleteFile[0]);
}

Expression filter = null;
if (jsonNode.has(RESIDUAL)) {
filter = ExpressionParser.fromJson(jsonNode.get(RESIDUAL));
}

return new UnboundBaseFileScanTask(dataFile, deleteFiles, filter);
}
}
47 changes: 47 additions & 0 deletions core/src/main/java/org/apache/iceberg/RESTPlanningMode.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import java.util.Locale;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;

public enum RESTPlanningMode {
REQUIRED("required"),
SUPPORTED("supported"),
UNSUPPORTED("unsupported");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still going through the proposed spec change but I'm skeptical do we really need an explicit unsupported? Isn't the lack of presence of supported or required in the config map enough for the client to make a decision?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think thats also fair if we need to have an additional unsupported flag, i think not having it also signals the same thing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will let others chime in here though before making a revision.

Copy link
Contributor

@amogh-jahagirdar amogh-jahagirdar Oct 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. After some more thought, my 2c is that I think I'd rather us try and get the model + client side changes into 1.7 rather than expand the scope with this aspect since it's quite useful without these things defined. I'd rather not have client side changes depend on another spec change decision.

For now, I think keeping it simple with a catalog client side property for controlling if server side planning is performed is the way forward.

Once the model and client side changes are in, I think it'd make total sense to revisit the spec changes you mentioned and these specific planning mode changes.

cc @rdblue @danielcweeks for their thoughts.

private final String planningMode;

RESTPlanningMode(String planningMode) {
this.planningMode = planningMode;
}

public String mode() {
return planningMode;
}

public static RESTPlanningMode fromName(String planningMode) {
Preconditions.checkArgument(planningMode != null, "planningMode is null");
try {
return RESTPlanningMode.valueOf(planningMode.toUpperCase(Locale.ENGLISH));
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
String.format("Invalid planningMode name: %s", planningMode), e);
}
}
}
Loading