-
Notifications
You must be signed in to change notification settings - Fork 324
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Ability to decode static metadata events #2495
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
* Copyright 2018-2023 contributors to the Marquez project | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package marquez.service.models; | ||
|
||
import com.fasterxml.jackson.annotation.JsonTypeInfo; | ||
import com.fasterxml.jackson.annotation.JsonTypeInfo.As; | ||
import com.fasterxml.jackson.annotation.JsonTypeInfo.Id; | ||
import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver; | ||
|
||
@JsonTypeIdResolver(EventTypeResolver.class) | ||
@JsonTypeInfo( | ||
use = Id.CUSTOM, | ||
include = As.EXISTING_PROPERTY, | ||
property = "schemaURL", | ||
defaultImpl = LineageEvent.class, | ||
visible = true) | ||
public class BaseEvent extends BaseJsonModel {} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* | ||
* Copyright 2018-2023 contributors to the Marquez project | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package marquez.service.models; | ||
|
||
import java.net.URI; | ||
import java.time.ZonedDateTime; | ||
import javax.validation.Valid; | ||
import javax.validation.constraints.NotNull; | ||
import lombok.AllArgsConstructor; | ||
import lombok.Builder; | ||
import lombok.Getter; | ||
import lombok.NoArgsConstructor; | ||
import lombok.Setter; | ||
import lombok.ToString; | ||
|
||
@Builder | ||
@AllArgsConstructor | ||
@NoArgsConstructor | ||
@Setter | ||
@Getter | ||
@Valid | ||
@ToString | ||
public class DatasetEvent extends BaseEvent { | ||
@NotNull private ZonedDateTime eventTime; | ||
@Valid private LineageEvent.Dataset dataset; | ||
@Valid @NotNull private String producer; | ||
@Valid @NotNull private URI schemaURL; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
/* | ||
* Copyright 2018-2023 contributors to the Marquez project | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package marquez.service.models; | ||
|
||
import static marquez.service.models.EventTypeResolver.EventSchemaURL.LINEAGE_EVENT; | ||
|
||
import com.fasterxml.jackson.annotation.JsonTypeInfo.Id; | ||
import com.fasterxml.jackson.databind.DatabindContext; | ||
import com.fasterxml.jackson.databind.JavaType; | ||
import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase; | ||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import lombok.AllArgsConstructor; | ||
import lombok.Getter; | ||
import lombok.extern.slf4j.Slf4j; | ||
|
||
@Slf4j | ||
public class EventTypeResolver extends TypeIdResolverBase { | ||
|
||
@AllArgsConstructor | ||
public enum EventSchemaURL { | ||
LINEAGE_EVENT( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be called RUN_EVENT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, let's use |
||
"https://openlineage.io/spec/2-0-0/OpenLineage.json#/definitions/RunEvent", | ||
LineageEvent.class), | ||
DATASET_EVENT( | ||
"https://openlineage.io/spec/2-0-0/OpenLineage.json#/definitions/DatasetEvent", | ||
DatasetEvent.class), | ||
JOB_EVENT( | ||
"https://openlineage.io/spec/2-0-0/OpenLineage.json#/definitions/JobEvent", JobEvent.class); | ||
|
||
@Getter private String schemaURL; | ||
|
||
public String getName() { | ||
int lastSlash = schemaURL.lastIndexOf('/'); | ||
return schemaURL.substring(lastSlash, schemaURL.length()); | ||
} | ||
|
||
@Getter private Class<?> subType; | ||
} | ||
|
||
private JavaType superType; | ||
|
||
@Override | ||
public void init(JavaType baseType) { | ||
superType = baseType; | ||
} | ||
|
||
@Override | ||
public String idFromValue(Object value) { | ||
return null; | ||
} | ||
|
||
@Override | ||
public String idFromValueAndType(Object value, Class<?> suggestedType) { | ||
return null; | ||
} | ||
|
||
@Override | ||
public JavaType typeFromId(DatabindContext context, String id) throws IOException { | ||
if (id == null) { | ||
return context.constructSpecializedType(superType, LINEAGE_EVENT.subType); | ||
} | ||
|
||
int lastSlash = id.lastIndexOf('/'); | ||
|
||
if (lastSlash < 0) { | ||
return context.constructSpecializedType(superType, LINEAGE_EVENT.subType); | ||
} | ||
Comment on lines
+63
to
+71
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add comment that we default to run event for backwards compatibility |
||
|
||
String type = id.substring(lastSlash, id.length()); | ||
|
||
Class<?> subType = | ||
Arrays.stream(EventSchemaURL.values()) | ||
.filter(s -> s.getName().equals(type)) | ||
.findAny() | ||
.map(EventSchemaURL::getSubType) | ||
.orElse(LINEAGE_EVENT.subType); | ||
|
||
return context.constructSpecializedType(superType, subType); | ||
} | ||
|
||
@Override | ||
public Id getMechanism() { | ||
return null; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
/* | ||
* Copyright 2018-2023 contributors to the Marquez project | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package marquez.service.models; | ||
|
||
import java.net.URI; | ||
import java.time.ZonedDateTime; | ||
import java.util.List; | ||
import javax.validation.Valid; | ||
import javax.validation.constraints.NotNull; | ||
import lombok.AllArgsConstructor; | ||
import lombok.Builder; | ||
import lombok.Getter; | ||
import lombok.NoArgsConstructor; | ||
import lombok.Setter; | ||
import lombok.ToString; | ||
|
||
@Builder | ||
@AllArgsConstructor | ||
@NoArgsConstructor | ||
@Setter | ||
@Getter | ||
@Valid | ||
@ToString | ||
public class JobEvent extends BaseEvent { | ||
wslulciuc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@NotNull private ZonedDateTime eventTime; | ||
@Valid @NotNull private LineageEvent.Job job; | ||
@Valid private List<LineageEvent.Dataset> inputs; | ||
@Valid private List<LineageEvent.Dataset> outputs; | ||
@Valid @NotNull private String producer; | ||
@Valid @NotNull private URI schemaURL; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
200
status code is correct for the new OL events, and feel we should also return a200
when accepting OL run events (as outlined by the OL spec). The semantics should be: "Return200 OK
to signify the OL event has been collected, and eventually will be processed." The201
status code was never changed during the initial PoC phase of OL. More of a thought, and we'll want to have a follow up PR.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The idea here was to distinguish between
RunEvent
that get saved into database (201 created) and other event types that do not affect application state. At the end, once Marquez will be capable of storing dataset and job events, it should return 201 for all the cases.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we save them in the lineage_events table to start with?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@julienledem we'll want to write a proposal on how to handle
DatasetEvent
s andJobEvent
s (see #2544). For now, let's ensure the event can be accepted (but not stored).