Skip to content

Commit

Permalink
Add --incompatible_enforce_starlark_utf8
Browse files Browse the repository at this point in the history
If enabled (or set to `error`), fail if Starlark files are not UTF-8 encoded. If set to `warning` (the default), emits a warning instead.

Bazel already assumes that Starlark files are UTF-8 encoded for e.g. filenames in actions executed remotely. This flag doesn't affect this, it only makes encoding failures more visible.

Work towards #374

Closes #24944.

PiperOrigin-RevId: 721513249
Change-Id: I1d3363168c6cd5d37abf96e0401e34866b6679d7
  • Loading branch information
fmeum authored and copybara-github committed Jan 30, 2025
1 parent c07a48e commit e7934ce
Show file tree
Hide file tree
Showing 15 changed files with 334 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/packages/semantics",
"//src/main/java/com/google/devtools/build/lib/rules:repository/repo_recorded_input",
"//src/main/java/com/google/devtools/build/lib/skyframe:sky_functions",
"//src/main/java/com/google/devtools/build/lib/skyframe:starlark_util",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization:visible-for-serialization",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec:serialization-constant",
Expand Down Expand Up @@ -251,6 +252,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/cmdline",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/packages",
"//src/main/java/com/google/devtools/build/lib/packages/semantics",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/rules:repository/repo_recorded_input",
"//src/main/java/com/google/devtools/build/lib/rules:repository/repository_directory_value",
Expand All @@ -260,6 +262,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_function",
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:starlark_util",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.packages.BazelStarlarkEnvironment;
import com.google.devtools.build.lib.packages.DotBazelFileSyntaxChecker;
import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions;
import com.google.devtools.build.lib.server.FailureDetails.ExternalDeps.Code;
import com.google.devtools.build.lib.skyframe.StarlarkUtil;
import net.starlark.java.eval.EvalException;
import net.starlark.java.eval.Module;
import net.starlark.java.eval.Starlark;
Expand Down Expand Up @@ -62,9 +64,21 @@ public static CompiledModuleFile parseAndCompile(
BazelStarlarkEnvironment starlarkEnv,
ExtendedEventHandler eventHandler)
throws ExternalDepsException {
StarlarkFile starlarkFile =
StarlarkFile.parse(
ParserInput.fromLatin1(moduleFile.getContent(), moduleFile.getLocation()));
ParserInput parserInput;
try {
parserInput =
StarlarkUtil.createParserInput(
moduleFile.getContent(),
moduleFile.getLocation(),
starlarkSemantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8),
eventHandler);
} catch (
@SuppressWarnings("UnusedException") // createParserInput() reports its own error message
StarlarkUtil.InvalidUtf8Exception e) {
throw ExternalDepsException.withMessage(
Code.BAD_MODULE, "error reading MODULE.bazel file for %s", moduleKey);
}
StarlarkFile starlarkFile = StarlarkFile.parse(parserInput);
if (!starlarkFile.ok()) {
Event.replayEventsOn(eventHandler, starlarkFile.errors());
throw ExternalDepsException.withMessage(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
import com.google.devtools.build.lib.packages.BazelStarlarkEnvironment;
import com.google.devtools.build.lib.packages.DotBazelFileSyntaxChecker;
import com.google.devtools.build.lib.packages.VendorThreadContext;
import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions;
import com.google.devtools.build.lib.rules.repository.RepositoryDelegatorFunction;
import com.google.devtools.build.lib.skyframe.PrecomputedValue;
import com.google.devtools.build.lib.skyframe.StarlarkUtil;
import com.google.devtools.build.lib.util.StringEncoding;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
Expand Down Expand Up @@ -115,7 +117,7 @@ private VendorThreadContext getVendorFileContext(
Environment env, SkyKey skyKey, Path vendorFilePath, StarlarkSemantics starlarkSemantics)
throws VendorFileFunctionException, InterruptedException {
try (Mutability mu = Mutability.create("vendor file")) {
StarlarkFile vendorFile = readAndParseVendorFile(vendorFilePath, env);
StarlarkFile vendorFile = readAndParseVendorFile(vendorFilePath, env, starlarkSemantics);
new DotBazelFileSyntaxChecker("VENDOR.bazel files", /* canLoadBzl= */ false)
.check(vendorFile);
net.starlark.java.eval.Module predeclaredEnv =
Expand Down Expand Up @@ -147,7 +149,8 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath)
}
}

private static StarlarkFile readAndParseVendorFile(Path path, Environment env)
private static StarlarkFile readAndParseVendorFile(
Path path, Environment env, StarlarkSemantics starlarkSemantics)
throws VendorFileFunctionException {
byte[] contents;
try {
Expand All @@ -156,8 +159,21 @@ private static StarlarkFile readAndParseVendorFile(Path path, Environment env)
throw new VendorFileFunctionException(
new IOException("error reading VENDOR.bazel file", e), Transience.TRANSIENT);
}
StarlarkFile starlarkFile =
StarlarkFile.parse(ParserInput.fromLatin1(contents, path.getPathString()));
ParserInput parserInput;
try {
parserInput =
StarlarkUtil.createParserInput(
contents,
path.getPathString(),
starlarkSemantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8),
env.getListener());
} catch (
@SuppressWarnings("UnusedException") // createParserInput() reports its own error message
StarlarkUtil.InvalidUtf8Exception e) {
throw new VendorFileFunctionException(
new BadVendorFileException("error reading VENDOR.bazel file"), Transience.PERSISTENT);
}
StarlarkFile starlarkFile = StarlarkFile.parse(parserInput);
if (!starlarkFile.ok()) {
Event.replayEventsOn(env.getListener(), starlarkFile.errors());
throw new VendorFileFunctionException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Interner;
import com.google.devtools.build.lib.concurrent.BlazeInterners;
import com.google.devtools.common.options.BoolOrEnumConverter;
import com.google.devtools.common.options.Converters.CommaSeparatedNonEmptyOptionListConverter;
import com.google.devtools.common.options.Converters.CommaSeparatedOptionListConverter;
import com.google.devtools.common.options.Converters.CommaSeparatedOptionSetConverter;
Expand Down Expand Up @@ -769,6 +770,38 @@ public final class BuildLanguageOptions extends OptionsBase {
+ " number of files is not 1.")
public boolean incompatibleLocationsPrefersExecutable;

/** An enum for specifying different modes for UTF-8 checking of Starlark files. */
public enum Utf8EnforcementMode {
OFF,
WARNING,
ERROR;

/** Converts to {@link Utf8EnforcementMode}. */
public static class Converter extends BoolOrEnumConverter<Utf8EnforcementMode> {
public Converter() {
super(
Utf8EnforcementMode.class,
"UTF-8 enforcement mode",
Utf8EnforcementMode.ERROR,
Utf8EnforcementMode.OFF);
}
}
}

@Option(
name = "incompatible_enforce_starlark_utf8",
defaultValue = "warning",
converter = Utf8EnforcementMode.Converter.class,
documentationCategory = OptionDocumentationCategory.INPUT_STRICTNESS,
effectTags = {OptionEffectTag.LOADING_AND_ANALYSIS},
metadataTags = {OptionMetadataTag.INCOMPATIBLE_CHANGE},
help =
"If enabled (or set to 'error'), fail if Starlark files are not UTF-8 encoded. If set to"
+ " 'warning', emit a warning instead. If set to 'off', Bazel assumes that Starlark"
+ " files are UTF-8 encoded but does not verify this assumption. Note that Starlark"
+ " files which are not UTF-8 encoded can cause Bazel to behave inconsistently.")
public Utf8EnforcementMode incompatibleEnforceStarlarkUtf8;

/**
* An interner to reduce the number of StarlarkSemantics instances. A single Blaze instance should
* never accumulate a large number of these and being able to shortcut on object identity makes a
Expand Down Expand Up @@ -835,6 +868,7 @@ public StarlarkSemantics toStarlarkSemantics() {
.setBool(StarlarkSemantics.PRINT_TEST_MARKER, internalStarlarkFlagTestCanary)
.setBool(
INCOMPATIBLE_DO_NOT_SPLIT_LINKING_CMDLINE, incompatibleDoNotSplitLinkingCmdline)
.set(INCOMPATIBLE_ENFORCE_STARLARK_UTF8, incompatibleEnforceStarlarkUtf8)
.setBool(
INCOMPATIBLE_USE_CC_CONFIGURE_FROM_RULES_CC, incompatibleUseCcConfigureFromRulesCc)
.setBool(
Expand Down Expand Up @@ -997,6 +1031,10 @@ public StarlarkSemantics toStarlarkSemantics() {
new StarlarkSemantics.Key<>("repositories_without_autoloads", ImmutableList.of());
public static final StarlarkSemantics.Key<List<String>> EXPERIMENTAL_BUILTINS_INJECTION_OVERRIDE =
new StarlarkSemantics.Key<>("experimental_builtins_injection_override", ImmutableList.of());
public static final StarlarkSemantics.Key<Utf8EnforcementMode>
INCOMPATIBLE_ENFORCE_STARLARK_UTF8 =
new StarlarkSemantics.Key<>(
"incompatible_enforce_starlark_utf8", Utf8EnforcementMode.WARNING);
public static final StarlarkSemantics.Key<Long> MAX_COMPUTATION_STEPS =
new StarlarkSemantics.Key<>("max_computation_steps", 0L);
public static final StarlarkSemantics.Key<Integer> NESTED_SET_DEPTH_LIMIT =
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/rules/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/skyframe:repository_mapping_function",
"//src/main/java/com/google/devtools/build/lib/skyframe:sky_functions",
"//src/main/java/com/google/devtools/build/lib/skyframe:sky_value_dirtiness_checker",
"//src/main/java/com/google/devtools/build/lib/skyframe:starlark_util",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.packages.BuildFileContainsErrorsException;
import com.google.devtools.build.lib.packages.NoSuchThingException;
import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions;
import com.google.devtools.build.lib.rules.repository.ResolvedFileValue.ResolvedFileKey;
import com.google.devtools.build.lib.skyframe.PrecomputedValue;
import com.google.devtools.build.lib.skyframe.StarlarkUtil;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.skyframe.SkyFunction;
import com.google.devtools.build.skyframe.SkyFunctionException;
Expand Down Expand Up @@ -72,8 +74,21 @@ public SkyValue compute(SkyKey skyKey, Environment env)
key.getPath().asPath(), key.getPath().asPath().getFileSize());

// parse
StarlarkFile file =
StarlarkFile.parse(ParserInput.fromLatin1(bytes, key.getPath().asPath().toString()));
ParserInput parserInput;
try {
parserInput =
StarlarkUtil.createParserInput(
bytes,
key.getPath().asPath().toString(),
starlarkSemantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8),
env.getListener());
} catch (
@SuppressWarnings(
"UnusedException") // createParserInput() reports its own error message
StarlarkUtil.InvalidUtf8Exception e) {
throw resolvedValueError("Failed to read resolved file " + key.getPath());
}
StarlarkFile file = StarlarkFile.parse(parserInput);
if (!file.ok()) {
Event.replayEventsOn(env.getListener(), file.errors());
throw resolvedValueError("Failed to parse resolved file " + key.getPath());
Expand Down
16 changes: 16 additions & 0 deletions src/main/java/com/google/devtools/build/lib/skyframe/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ java_library(
":skyframe_stats",
":skyvalue_retriever_utils",
":starlark_builtins_value",
":starlark_util",
":state_informing_sky_function_environment",
":target_completion_value",
":target_cycle_reporter",
Expand Down Expand Up @@ -800,11 +801,13 @@ java_library(
":bzl_compile_value",
":precomputed_value",
":starlark_builtins_value",
":starlark_util",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/cmdline",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/packages",
"//src/main/java/com/google/devtools/build/lib/packages:autoload_symbols",
"//src/main/java/com/google/devtools/build/lib/packages/semantics",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/skyframe:skyframe-objects",
"//src/main/java/net/starlark/java/eval",
Expand Down Expand Up @@ -2401,10 +2404,12 @@ java_library(
deps = [
":precomputed_value",
":repo_file_value",
":starlark_util",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/cmdline",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/packages",
"//src/main/java/com/google/devtools/build/lib/packages/semantics",
"//src/main/java/com/google/devtools/build/lib/rules:repository/repository_directory_value",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/skyframe:skyframe-objects",
Expand Down Expand Up @@ -3338,3 +3343,14 @@ java_library(
"//third_party:guava",
],
)

java_library(
name = "starlark_util",
srcs = ["StarlarkUtil.java"],
deps = [
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/packages/semantics",
"//src/main/java/net/starlark/java/syntax",
"//third_party:guava",
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.packages.AutoloadSymbols;
import com.google.devtools.build.lib.packages.BazelStarlarkEnvironment;
import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.RootedPath;
Expand Down Expand Up @@ -169,7 +170,19 @@ static BzlCompileValue computeInline(
}

// We have all deps. Parse, resolve, and return.
ParserInput input = ParserInput.fromLatin1(bytes, inputName);
ParserInput input;
try {
input =
StarlarkUtil.createParserInput(
bytes,
inputName,
semantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8),
env.getListener());
} catch (
@SuppressWarnings("UnusedException") // createParserInput() reports its own error message
StarlarkUtil.InvalidUtf8Exception e) {
return BzlCompileValue.noFile("compilation of '%s' failed", inputName);
}
FileOptions options =
FileOptions.builder()
// By default, Starlark load statements create file-local bindings.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import com.google.devtools.build.lib.cmdline.RepositoryMapping;
import com.google.devtools.build.lib.cmdline.RepositoryName;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.events.StoredEventHandler;
import com.google.devtools.build.lib.io.FileSymlinkException;
import com.google.devtools.build.lib.io.InconsistentFilesystemException;
import com.google.devtools.build.lib.packages.AutoloadSymbols;
Expand Down Expand Up @@ -1079,7 +1081,8 @@ private LoadedPackage loadPackage(
buildFileRootedPath,
buildFileValue,
starlarkBuiltinsValue,
preludeBindings);
preludeBindings,
env.getListener());
state.compiledBuildFile = compiled;
}

Expand Down Expand Up @@ -1232,7 +1235,8 @@ private CompiledBuildFile compileBuildFile(
RootedPath buildFilePath,
FileValue buildFileValue,
StarlarkBuiltinsValue starlarkBuiltinsValue,
@Nullable Map<String, Object> preludeBindings)
@Nullable Map<String, Object> preludeBindings,
ExtendedEventHandler reporter)
throws PackageFunctionException {
// Though it could be in principle, `cpuBoundSemaphore` is not held here as this method does
// not show up in profiles as being significantly impacted by thrashing. It could be worth doing
Expand Down Expand Up @@ -1266,7 +1270,27 @@ private CompiledBuildFile compileBuildFile(
// If control flow reaches here, we're in territory that is deliberately unsound.
// See the javadoc for ActionOnIOExceptionReadingBuildFile.
}
ParserInput input = ParserInput.fromLatin1(buildFileBytes, inputFile.toString());
StoredEventHandler handler = new StoredEventHandler();
ParserInput input;
try {
input =
StarlarkUtil.createParserInput(
buildFileBytes,
inputFile.toString(),
semantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8),
handler);
} catch (StarlarkUtil.InvalidUtf8Exception e) {
handler.replayOn(reporter);
throw PackageFunctionException.builder()
.setType(PackageFunctionException.Type.BUILD_FILE_CONTAINS_ERRORS)
.setPackageIdentifier(packageId)
.setTransience(Transience.PERSISTENT)
.setException(e)
.setMessage("error reading " + inputFile.toString())
.setPackageLoadingCode(PackageLoading.Code.STARLARK_EVAL_ERROR)
.build();
}
handler.replayOn(reporter);

// Options for processing BUILD files.
FileOptions options =
Expand Down
Loading

0 comments on commit e7934ce

Please sign in to comment.