-
Notifications
You must be signed in to change notification settings - Fork 994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Send acknowledgment on Spec Update only after sinks are ready #822
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* Copyright 2018-2020 The Feast Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package feast.common.models; | ||
|
||
import java.io.Serializable; | ||
import lombok.AllArgsConstructor; | ||
import lombok.Data; | ||
|
||
/** | ||
* FeatureSetReference is key that uniquely defines specific version of FeatureSet or FeatureSetSpec | ||
*/ | ||
@Data | ||
@AllArgsConstructor | ||
public class FeatureSetReference implements Serializable { | ||
/* Name of project to which this featureSet is assigned */ | ||
private String projectName; | ||
/* Name of FeatureSet */ | ||
private String featureSetName; | ||
/* Version of FeatureSet */ | ||
private Integer version; | ||
pyalex marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Empty constructor required for Avro decoding. | ||
@SuppressWarnings("unused") | ||
public FeatureSetReference() {} | ||
|
||
public static FeatureSetReference of(String projectName, String featureSetName, Integer version) { | ||
return new FeatureSetReference(projectName, featureSetName, version); | ||
} | ||
|
||
public String getReference() { | ||
return String.format("%s/%s", getProjectName(), getFeatureSetName()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,29 +17,38 @@ | |
package feast.ingestion.transform.specs; | ||
|
||
import com.google.auto.value.AutoValue; | ||
import feast.proto.core.FeatureSetProto; | ||
import feast.common.models.FeatureSetReference; | ||
import feast.proto.core.IngestionJobProto; | ||
import java.io.ByteArrayOutputStream; | ||
import java.io.IOException; | ||
import org.apache.beam.sdk.io.kafka.KafkaIO; | ||
import org.apache.beam.sdk.transforms.DoFn; | ||
import org.apache.beam.sdk.transforms.PTransform; | ||
import org.apache.beam.sdk.transforms.ParDo; | ||
import org.apache.beam.sdk.transforms.*; | ||
import org.apache.beam.sdk.transforms.windowing.AfterPane; | ||
import org.apache.beam.sdk.transforms.windowing.GlobalWindows; | ||
import org.apache.beam.sdk.transforms.windowing.Repeatedly; | ||
import org.apache.beam.sdk.transforms.windowing.Window; | ||
import org.apache.beam.sdk.values.KV; | ||
import org.apache.beam.sdk.values.PCollection; | ||
import org.apache.beam.sdk.values.PDone; | ||
import org.apache.kafka.common.serialization.ByteArraySerializer; | ||
import org.apache.kafka.common.serialization.StringSerializer; | ||
import org.joda.time.Duration; | ||
|
||
/** | ||
* Converts input {@link feast.proto.core.FeatureSetProto.FeatureSetSpec} into {@link | ||
* Collects output from sinks prepareWrite (several streams flatten into one). As soon as count of | ||
* each FeatureSetReference reach getSinksCount() - it means that enough amount of sinks updated its | ||
* state - ack is pushed. | ||
woop marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* | ||
* <p>Converts input {@link FeatureSetReference} into {@link | ||
* feast.proto.core.IngestionJobProto.FeatureSetSpecAck} message and writes it to kafka (ack-topic). | ||
*/ | ||
@AutoValue | ||
public abstract class WriteFeatureSetSpecAck | ||
extends PTransform<PCollection<KV<String, FeatureSetProto.FeatureSetSpec>>, PDone> { | ||
extends PTransform<PCollection<FeatureSetReference>, PDone> { | ||
public abstract IngestionJobProto.SpecsStreamingUpdateConfig getSpecsStreamingUpdateConfig(); | ||
|
||
public abstract Integer getSinksCount(); | ||
|
||
public static Builder newBuilder() { | ||
return new AutoValue_WriteFeatureSetSpecAck.Builder(); | ||
} | ||
|
@@ -49,12 +58,15 @@ public abstract static class Builder { | |
public abstract Builder setSpecsStreamingUpdateConfig( | ||
IngestionJobProto.SpecsStreamingUpdateConfig config); | ||
|
||
public abstract Builder setSinksCount(Integer count); | ||
|
||
public abstract WriteFeatureSetSpecAck build(); | ||
} | ||
|
||
@Override | ||
public PDone expand(PCollection<KV<String, FeatureSetProto.FeatureSetSpec>> input) { | ||
public PDone expand(PCollection<FeatureSetReference> input) { | ||
return input | ||
.apply("Prepare", new PrepareWrite(getSinksCount())) | ||
.apply("FeatureSetSpecToAckMessage", ParDo.of(new BuildAckMessage())) | ||
.apply( | ||
"ToKafka", | ||
|
@@ -66,20 +78,50 @@ public PDone expand(PCollection<KV<String, FeatureSetProto.FeatureSetSpec>> inpu | |
.withValueSerializer(ByteArraySerializer.class)); | ||
} | ||
|
||
private static class BuildAckMessage | ||
extends DoFn<KV<String, FeatureSetProto.FeatureSetSpec>, KV<String, byte[]>> { | ||
private static class BuildAckMessage extends DoFn<FeatureSetReference, KV<String, byte[]>> { | ||
@ProcessElement | ||
public void process(ProcessContext c) throws IOException { | ||
ByteArrayOutputStream encodedAck = new ByteArrayOutputStream(); | ||
|
||
IngestionJobProto.FeatureSetSpecAck.newBuilder() | ||
.setFeatureSetReference(c.element().getKey()) | ||
.setFeatureSetReference(c.element().getReference()) | ||
.setJobName(c.getPipelineOptions().getJobName()) | ||
.setFeatureSetVersion(c.element().getValue().getVersion()) | ||
.setFeatureSetVersion(c.element().getVersion()) | ||
.build() | ||
.writeTo(encodedAck); | ||
|
||
c.output(KV.of(c.element().getKey(), encodedAck.toByteArray())); | ||
c.output(KV.of(c.element().getReference(), encodedAck.toByteArray())); | ||
} | ||
} | ||
|
||
/** | ||
* Groups FeatureSetReference to generate ack only when amount of repeating elements reach | ||
* sinksCount | ||
*/ | ||
static class PrepareWrite | ||
extends PTransform<PCollection<FeatureSetReference>, PCollection<FeatureSetReference>> { | ||
private final Integer sinksCount; | ||
|
||
PrepareWrite(Integer sinksCount) { | ||
this.sinksCount = sinksCount; | ||
} | ||
|
||
@Override | ||
public PCollection<FeatureSetReference> expand(PCollection<FeatureSetReference> input) { | ||
return input | ||
.apply( | ||
"OnEveryElementTrigger", | ||
Window.<FeatureSetReference>into(new GlobalWindows()) | ||
.accumulatingFiredPanes() | ||
.triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) | ||
.withAllowedLateness(Duration.ZERO)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Im wondering, what's the effect of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not quite sure, but Beam demands specified lateness There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oke |
||
.apply("CountingReadySinks", Count.perElement()) | ||
.apply( | ||
"WhenAllReady", | ||
Filter.by( | ||
(SerializableFunction<KV<FeatureSetReference, Long>, Boolean>) | ||
count -> count.getValue() >= sinksCount)) | ||
.apply(Keys.create()); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to make a decision on Lombok. Personally I am open to using it, but the team previously decided to phase it out and to use purely AutoValue. Apparently they are not compatible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, so I think AutoValue is more heavily used, since it's everywhere in Beam and I also applied it a lot in ingestion. So I changed to AutoValue.Unfortunately AutoValue cannot be used in this specific case since Avro needs to have empty constructor on object, which is hard to have on autovalue object if not impossible. In general I like AutoValue more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, so lets continue to use Lombok for the time being. I hope this doesnt interfere with the rest of the code base.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But there is no need to use the AvroCoder, right (
@DefaultSchema(AutoValueSchema.class)
can be used for serializable autovalue types)? And even if you do want to use AvroCoder, in this case there are only 3 parameters to the object, the methods can be easily written out in code over using LombokThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@zhilingc you mean SerializableCoder? it's not deterministic and thus such class can't be used as Key in KV.
Regarding second point, I also need equality and hash to use it as key. And I'm not a fan of generating boilerplate code.
I've already produced several enormously huge PRs for the recent few weeks. Don't think it was easy to review them
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"as key" I meant for grouping operations of course
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, I guess @zhilingc is right. I could use
SchemaCoder
. Of course not through decorator@DefaultSchema()
since it's common module and we don't want to have dependency on beam here. But that's still feasible. I guess we can move towards AutoValue.