Skip to content

Commit

Permalink
chore(config): Make config schema output ordered (#17694)
Browse files Browse the repository at this point in the history
Key/value maps in the config schema currently use an `IndexMap`. This
provides for stable ordering of the output between runs. Unfortunately,
due to the use of initializers via `inventory`, that ordering is
dependent on the order in which those initializers are run, which is in
turn dependent on the choices the linker (and link-time optimization)
makes when building Vector. The result is that config schemas can be
very hard to compare between builds due to large blocks of it moving
around in the output file.

This change changes this type to a `BTreeMap`, which is fully ordered
internally. This will result in a consistent ordering of maps, hopefully
producing config schema output that is more consistent.

<!--
**Your PR title must conform to the conventional commit spec!**

  <type>(<scope>)!: <description>

  * `type` = chore, enhancement, feat, fix, docs
  * `!` = OPTIONAL: signals a breaking change
* `scope` = Optional when `type` is "chore" or "docs", available scopes
https://github.com/vectordotdev/vector/blob/master/.github/semantic.yml#L20
  * `description` = short description of the change

Examples:

  * enhancement(file source): Add `sort` option to sort discovered files
  * feat(new source): Initial `statsd` source
  * fix(file source): Fix a bug discovering new files
  * chore(external docs): Clarify `batch_size` option
-->
  • Loading branch information
bruceg authored Jun 15, 2023
1 parent 079d895 commit 9606353
Showing 6 changed files with 9 additions and 17 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion lib/vector-config-common/Cargo.toml
Original file line number Diff line number Diff line change
@@ -7,7 +7,6 @@ license = "MPL-2.0"
[dependencies]
convert_case = { version = "0.6", default-features = false }
darling = { version = "0.13", default-features = false, features = ["suggestions"] }
indexmap = { version = "1.9", default-features = false, features = ["serde"] }
once_cell = { version = "1", default-features = false, features = ["std"] }
proc-macro2 = { version = "1.0", default-features = false }
serde = { version = "1.0", default-features = false, features = ["derive"] }
4 changes: 3 additions & 1 deletion lib/vector-config-common/src/schema/mod.rs
Original file line number Diff line number Diff line change
@@ -8,7 +8,9 @@ pub mod visit;

pub(crate) const DEFINITIONS_PREFIX: &str = "#/definitions/";

pub type Map<K, V> = indexmap::IndexMap<K, V>;
// We have chosen the `BTree*` types here instead of hash tables to provide for a consistent
// ordering of the output elements between runs and changes to the configuration.
pub type Map<K, V> = std::collections::BTreeMap<K, V>;
pub type Set<V> = std::collections::BTreeSet<V>;

pub use self::gen::{SchemaGenerator, SchemaSettings};
12 changes: 2 additions & 10 deletions lib/vector-config/src/schema/visitors/inline_single.rs
Original file line number Diff line number Diff line change
@@ -43,17 +43,11 @@ impl Visitor for InlineSingleUseReferencesVisitor {
occurrence_visitor.visit_root_schema(root);
let occurrence_map = occurrence_visitor.into_occurrences();

let eligible_to_inline = occurrence_map
self.eligible_to_inline = occurrence_map
.into_iter()
// Filter out any schemas which have more than one occurrence, as naturally, we're
// trying to inline single-use schema references. :)
.filter_map(|(def_name, occurrences)| {
if occurrences == 1 {
Some(def_name)
} else {
None
}
})
.filter_map(|(def_name, occurrences)| (occurrences == 1).then_some(def_name))
// However, we'll also filter out some specific schema definitions which are only
// referenced once, specifically: component base types and component types themselves.
//
@@ -72,8 +66,6 @@ impl Visitor for InlineSingleUseReferencesVisitor {
.map(|s| s.as_ref().to_string())
.collect::<HashSet<_>>();

self.eligible_to_inline = eligible_to_inline;

// Now run our own visitor logic, which will use the inline eligibility to determine if a
// schema reference in a being-visited schema should be replaced inline with the original
// referenced schema, in turn removing the schema definition.
4 changes: 2 additions & 2 deletions lib/vector-config/src/schema/visitors/merge.rs
Original file line number Diff line number Diff line change
@@ -95,7 +95,7 @@ impl Mergeable for serde_json::Map<String, Value> {

impl<K, V> Mergeable for Map<K, V>
where
K: std::hash::Hash + Eq + Clone,
K: Clone + Eq + Ord,
V: Clone + Mergeable,
{
fn merge(&mut self, other: &Self) {
@@ -261,7 +261,7 @@ where

fn merge_map<K, V>(destination: &mut Map<K, V>, source: &Map<K, V>)
where
K: std::hash::Hash + Eq + Clone,
K: Clone + Eq + Ord,
V: Clone + Mergeable,
{
destination.merge(source);
4 changes: 2 additions & 2 deletions lib/vector-config/src/schema/visitors/unevaluated.rs
Original file line number Diff line number Diff line change
@@ -361,9 +361,9 @@ fn is_markable_schema(definitions: &Map<String, Schema>, schema: &SchemaObject)
.as_ref()
.and_then(|reference| {
let reference = get_cleaned_schema_reference(reference);
definitions.get_full(reference)
definitions.get_key_value(reference)
})
.and_then(|(_, name, schema)| schema.as_object().map(|schema| (name, schema)))
.and_then(|(name, schema)| schema.as_object().map(|schema| (name, schema)))
.map_or(false, |(name, schema)| {
debug!(
"Following schema reference '{}' for subschema markability.",

0 comments on commit 9606353

Please sign in to comment.