Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable record schema validation using v1 type system; CI uses MSG to start EC2 runners #20439

Merged
merged 10 commits into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/start-aws-runner/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ runs:
aws-region: us-east-2
- name: Start EC2 runner
id: start-ec2-runner
uses: airbytehq/ec2-github-runner@base64v1.1.0
uses: airbytehq/ec2-github-runner@MSGv0.0.2
with:
mode: start
github-token: ${{ inputs.github-token }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,19 @@
package io.airbyte.validation.json;

import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.networknt.schema.JsonMetaSchema;
import com.networknt.schema.JsonSchema;
import com.networknt.schema.JsonSchemaFactory;
import com.networknt.schema.SchemaValidatorsConfig;
import com.networknt.schema.SpecVersion;
import com.networknt.schema.ValidationContext;
import com.networknt.schema.ValidationMessage;
import io.airbyte.commons.string.Strings;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
Expand All @@ -25,13 +30,37 @@
public class JsonSchemaValidator {

private static final Logger LOGGER = LoggerFactory.getLogger(JsonSchemaValidator.class);
// This URI just needs to point at any path in the same directory as /app/WellKnownTypes.json
// It's required for the JsonSchema#validate method to resolve $ref correctly.
private static final URI DEFAULT_BASE_URI;

static {
try {
DEFAULT_BASE_URI = new URI("file:///app/nonexistent_file.json");
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}

private final SchemaValidatorsConfig schemaValidatorsConfig;
private final JsonSchemaFactory jsonSchemaFactory;
private final URI baseUri;

public JsonSchemaValidator() {
this.schemaValidatorsConfig = new SchemaValidatorsConfig();
this(DEFAULT_BASE_URI);
}

/**
* The public constructor hardcodes a URL with access to WellKnownTypes.json. This method allows
* tests to override that URI
*
* Required to resolve $ref schemas using WellKnownTypes.json
*
* @param baseUri The base URI for schema resolution
*/
@VisibleForTesting
protected JsonSchemaValidator(URI baseUri) {
this.jsonSchemaFactory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V7);
this.baseUri = baseUri;
}

public Set<String> validate(final JsonNode schemaJson, final JsonNode objectJson) {
Expand Down Expand Up @@ -60,8 +89,38 @@ private Set<ValidationMessage> validateInternal(final JsonNode schemaJson, final
Preconditions.checkNotNull(schemaJson);
Preconditions.checkNotNull(objectJson);

return jsonSchemaFactory.getSchema(schemaJson, schemaValidatorsConfig)
.validate(objectJson);
// Default to draft-07, but have handling for the other metaschemas that networknt supports
JsonMetaSchema metaschema;
JsonNode metaschemaNode = schemaJson.get("$schema");
if (metaschemaNode == null || metaschemaNode.asText() == null || metaschemaNode.asText().isEmpty()) {
metaschema = JsonMetaSchema.getV7();
} else {
String metaschemaString = metaschemaNode.asText();
// We're not using "http://....".equals(), because we want to avoid weirdness with https, etc.
if (metaschemaString.contains("json-schema.org/draft-04")) {
metaschema = JsonMetaSchema.getV4();
} else if (metaschemaString.contains("json-schema.org/draft-06")) {
metaschema = JsonMetaSchema.getV6();
} else if (metaschemaString.contains("json-schema.org/draft/2019-09")) {
metaschema = JsonMetaSchema.getV201909();
} else if (metaschemaString.contains("json-schema.org/draft/2020-12")) {
metaschema = JsonMetaSchema.getV202012();
} else {
metaschema = JsonMetaSchema.getV7();
}
}

ValidationContext context = new ValidationContext(
jsonSchemaFactory.getUriFactory(),
null,
metaschema,
jsonSchemaFactory,
null);
JsonSchema schema = new JsonSchema(
context,
baseUri,
schemaJson);
return schema.validate(objectJson);
}

public boolean test(final JsonNode schemaJson, final JsonNode objectJson) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package io.airbyte.validation.json;

import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
Expand All @@ -15,7 +16,10 @@
import io.airbyte.commons.json.Jsons;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.util.Set;
import lombok.SneakyThrows;
import org.junit.jupiter.api.Test;

class JsonSchemaValidatorTest {
Expand Down Expand Up @@ -102,4 +106,39 @@ void test() throws IOException {
assertNull(JsonSchemaValidator.getSchema(schemaFile, "NonExistentObject"));
}

@SneakyThrows
@Test
void testResolveReferences() throws IOException {
String referencableSchemas = """
{
"definitions": {
"ref1": {"type": "string"},
"ref2": {"type": "boolean"}
}
}
""";
final File schemaFile = IOs.writeFile(Files.createTempDirectory("test"), "WellKnownTypes.json", referencableSchemas).toFile();
JsonSchemaValidator jsonSchemaValidator =
new JsonSchemaValidator(new URI("file://" + schemaFile.getParentFile().getAbsolutePath() + "/foo.json"));

Set<String> validationResult = jsonSchemaValidator.validate(
Jsons.deserialize("""
{
"type": "object",
"properties": {
"prop1": {"$ref": "WellKnownTypes.json#/definitions/ref1"},
"prop2": {"$ref": "WellKnownTypes.json#/definitions/ref2"}
}
}
"""),
Jsons.deserialize("""
{
"prop1": "foo",
"prop2": "false"
}
"""));

assertEquals(Set.of("$.prop2: string found, boolean expected"), validationResult);
}

}
3 changes: 3 additions & 0 deletions airbyte-workers/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ WORKDIR /app
# Move worker app
ADD bin/${APPLICATION}-${VERSION}.tar /app

# Grab well-known types file
COPY WellKnownTypes.json /app

# wait for upstream dependencies to become available before starting server
ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-${VERSION}/bin/${APPLICATION}"]
15 changes: 15 additions & 0 deletions airbyte-workers/build.gradle
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import groovy.yaml.YamlSlurper
import groovy.json.JsonBuilder

plugins {
id 'application'
id 'airbyte-integration-test-java'
Expand Down Expand Up @@ -117,8 +120,20 @@ task cloudStorageIntegrationTest(type: Test) {
}
}

task generateWellKnownTypes() {
doLast {
def wellKnownTypesYaml = project(':airbyte-protocol').file('protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml').text
def parsedYaml = new YamlSlurper().parseText(wellKnownTypesYaml)
def wellKnownTypesJson = new JsonBuilder(parsedYaml).toPrettyString()
def targetFile = project.file("build/docker/WellKnownTypes.json")
targetFile.getParentFile().mkdirs()
targetFile.text = wellKnownTypesJson
}
}

tasks.named("buildDockerImage") {
dependsOn copyGeneratedTar
dependsOn generateWellKnownTypes
}

Task publishArtifactsTask = getPublishArtifactsTask("$rootProject.ext.version", project)
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ buildscript {
// The alternative is to import the openapi plugin for all modules.
// This might need to be updated when we change openapi plugin versions.
classpath 'com.fasterxml.jackson.core:jackson-core:2.13.0'

classpath 'org.codehaus.groovy:groovy-yaml:3.0.3'
}
}

Expand Down