Skip to content

Commit

Permalink
compose: Hash all treefile externals and flattened manifest
Browse files Browse the repository at this point in the history
Move hashing to the Rust side so that we can easily hash over the final
set of inputs after parsing. This means that we now hash over all the
externals, like `add-files` references, any `postprocess-script` script,
and `passwd` and `group` files.

The original motivation for this was that hashing over a reserialized
version of the treefile was not deterministic now that treefiles include
hash tables (i.e. `add-commit-metadata`). So I initially included each
individual treefile as part of the hash.

I realized afterwards that just switching to `BTreeMap` fixes this, so
we can keep hashing only the final flattened reserialized treefile so we
ignore comments and whitespace too. But since I already wrote the patch,
and it fixes a real issue today... here we are.
  • Loading branch information
jlebon committed Jul 9, 2019
1 parent 71e585f commit 9648a41
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 35 deletions.
60 changes: 55 additions & 5 deletions rust/src/treefile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ const INCLUDE_MAXDEPTH: u32 = 50;
/// a TreeComposeConfig.
struct TreefileExternals {
postprocess_script: Option<fs::File>,
add_files: collections::HashMap<String, fs::File>,
add_files: collections::BTreeMap<String, fs::File>,
passwd: Option<fs::File>,
group: Option<fs::File>,
}
Expand All @@ -56,6 +56,8 @@ pub struct Treefile {
rojig_spec: Option<CUtf8Buf>,
serialized: CUtf8Buf,
externals: TreefileExternals,
// This is a checksum over *all* the treefile inputs (recursed treefiles + externals).
checksum: CUtf8Buf,
}

// We only use this while parsing
Expand Down Expand Up @@ -239,7 +241,7 @@ fn treefile_parse<P: AsRef<Path>>(
} else {
None
};
let mut add_files: collections::HashMap<String, fs::File> = collections::HashMap::new();
let mut add_files: BTreeMap<String, fs::File> = BTreeMap::new();
if let Some(ref add_file_names) = tf.add_files.as_ref() {
for (name, _) in add_file_names.iter() {
add_files.insert(name.clone(), utils::open_file(filename.with_file_name(name))?);
Expand Down Expand Up @@ -361,9 +363,7 @@ fn treefile_merge_externals(dest: &mut TreefileExternals, src: &mut TreefileExte
}

// add-files is an array and hence has append semantics.
for (k, v) in src.add_files.drain() {
dest.add_files.insert(k, v);
}
dest.add_files.append(&mut src.add_files);

// passwd/group are basic values
if dest.passwd.is_none() {
Expand Down Expand Up @@ -432,6 +432,13 @@ impl Treefile {
(None, None)
};
let serialized = Treefile::serialize_json_string(&parsed.config)?;
// Notice we hash the *reserialization* of the final flattened treefile only so that e.g.
// comments/whitespace/hash table key reorderings don't trigger a respin. We could take
// this further by using a custom `serialize_with` for Vecs where ordering doesn't matter
// (or just sort the Vecs).
let mut hasher = glib::Checksum::new(glib::ChecksumType::Sha256);
hasher.update(serialized.as_bytes());
parsed.externals.hasher_update(&mut hasher)?;
Ok(Box::new(Treefile {
primary_dfd: dfd,
parsed: parsed.config,
Expand All @@ -440,6 +447,7 @@ impl Treefile {
rojig_spec: rojig_spec,
serialized: serialized,
externals: parsed.externals,
checksum: CUtf8Buf::from_string(hasher.get_string().unwrap()),
}))
}

Expand Down Expand Up @@ -516,6 +524,42 @@ for x in *; do mv ${{x}} %{{buildroot}}%{{_prefix}}/lib/ostree-jigdo/%{{name}};
}
}

fn hash_file(hasher: &mut glib::Checksum, mut f: &fs::File) -> Fallible<()> {
let mut reader = io::BufReader::with_capacity(128 * 1024, f);
loop {
// have to scope fill_buf() so we can consume() below
let n = {
let buf = reader.fill_buf()?;
hasher.update(buf);
buf.len()
};
if n == 0 {
break;
}
reader.consume(n);
}
f.seek(io::SeekFrom::Start(0))?;
Ok(())
}

impl TreefileExternals {
fn hasher_update(&self, hasher: &mut glib::Checksum) -> Fallible<()> {
if let Some(ref f) = self.postprocess_script {
hash_file(hasher, f)?;
}
if let Some(ref f) = self.passwd {
hash_file(hasher, f)?;
}
if let Some(ref f) = self.group {
hash_file(hasher, f)?;
}
for f in self.add_files.values() {
hash_file(hasher, f)?;
}
Ok(())
}
}

/// For increased readability in YAML/JSON, we support whitespace in individual
/// array elements.
fn whitespace_split_packages(pkgs: &[String]) -> Vec<String> {
Expand Down Expand Up @@ -1228,6 +1272,12 @@ mod ffi {
.unwrap_or(ptr::null_mut())
}

#[no_mangle]
pub extern "C" fn ror_treefile_get_checksum(tf: *mut Treefile) -> *const libc::c_char {
let tf = ref_from_raw_ptr(tf);
tf.checksum.as_ptr()
}

#[no_mangle]
pub extern "C" fn ror_treefile_free(tf: *mut Treefile) {
if tf.is_null() {
Expand Down
33 changes: 3 additions & 30 deletions src/app/rpmostree-composeutil.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,36 +58,9 @@ rpmostree_composeutil_checksum (HyGoal goal,
{
g_autoptr(GChecksum) checksum = g_checksum_new (G_CHECKSUM_SHA256);

/* Hash in the raw treefile; this means reordering the input packages
* or adding a comment will cause a recompose, but let's be conservative
* here.
*/
g_checksum_update (checksum, (guint8*)ror_treefile_get_json_string (tf), -1);

if (json_object_has_member (treefile, "add-files"))
{
JsonArray *add_files = json_object_get_array_member (treefile, "add-files");
guint i, len = json_array_get_length (add_files);
for (i = 0; i < len; i++)
{
JsonArray *add_el = json_array_get_array_element (add_files, i);
if (!add_el)
return glnx_throw (error, "Element in add-files is not an array");
const char *src = _rpmostree_jsonutil_array_require_string_element (add_el, 0, error);
if (!src)
return FALSE;

int src_fd = ror_treefile_get_add_file_fd (tf, src);
g_assert_cmpint (src_fd, !=, -1);
g_autoptr(GBytes) bytes = glnx_fd_readall_bytes (src_fd, NULL, FALSE);
gsize len;
const guint8* buf = g_bytes_get_data (bytes, &len);
g_checksum_update (checksum, (const guint8 *) buf, len);
}

}

/* FIXME; we should also hash the post script */
/* Hash in the treefile inputs (this includes all externals like postprocess, add-files,
* etc... and the final flattened treefile -- see treefile.rs for more details). */
g_checksum_update (checksum, (guint8*)ror_treefile_get_checksum (tf), -1);

/* Hash in each package */
if (!rpmostree_dnf_add_checksum_goal (checksum, goal, NULL, error))
Expand Down

0 comments on commit 9648a41

Please sign in to comment.