Skip to content

Commit

Permalink
bazel: rewrite MD5 .note.gnu.build-id with truncated git SHA1.
Browse files Browse the repository at this point in the history
This is a workaround for
bazelbuild/bazel#2805.

Small reorganization of version_generated.cc rules, since we rely on them
here and this can also assist with the Google import of Envoy and its
versioning scheme.

Also added docs on build types and how to do release builds.
  • Loading branch information
htuch committed Apr 14, 2017
1 parent 1875a0d commit e7e4684
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 23 deletions.
18 changes: 0 additions & 18 deletions BUILD
Original file line number Diff line number Diff line change
@@ -1,18 +0,0 @@
package(default_visibility = ["//visibility:public"])

load("//bazel:envoy_build_system.bzl", "envoy_cc_library")

genrule(
name = "envoy_version",
srcs = glob([".git/**"]),
outs = ["version_generated.cc"],
cmd = "touch $@ && $(location tools/gen_git_sha.sh) $$(dirname $(location tools/gen_git_sha.sh)) $@",
local = 1,
tools = ["tools/gen_git_sha.sh"],
)

envoy_cc_library(
name = "version_generated",
srcs = ["version_generated.cc"],
deps = ["//source/common/common:version_includes"],
)
31 changes: 30 additions & 1 deletion bazel/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,42 @@ tools/bazel-test-gdb //test/common/http:async_client_impl_test

# Additional Envoy build and test options

In general, there are 3 [compilation
modes](https://bazel.build/versions/master/docs/bazel-user-manual.html#flag--compilation_mode)
that Bazel supports:

* `fastbuild`: `-O0 -DDEBUG`, aimed at developer speed (default).
* `opt`: `-O2`, for production builds and performance benchmarking.
* `dbg`: `-O0 -ggdb3 -DDEBUG`, debug symbols.

You can use the `-c <compilation_mode>` flag to control this, e.g.

```
bazel build -c opt //source/exe:envoy-static
```

To build and run tests with the compiler's address sanitizer (ASAN) enabled:

```
bazel test -c dbg --config=asan //test/...
```

The ASAN failure stack traces include numbers as a results of running ASAN with a `dbg` build above.
The ASAN failure stack traces include line numbers as a results of running ASAN with a `dbg` build above.

# Release builds

Release builds should be built in `opt` mode, processed with `strip` and have a
`.note.gnu.build-id` section with the Git SHA1 at which the build took place.
They should also ignore any local `.bazelrc` for reproducibility. This can be
achieved with:

```
bazel --bazelrc=/dev/null build -c opt //source/exe:envoy-static.stripped.stamped
```

One caveat to note is that the Git SHA1 is truncated to 16 bytes today as a
result of the workaround in place for
https://github.com/bazelbuild/bazel/issues/2805.

# Adding or maintaining Envoy build rules

Expand Down
33 changes: 33 additions & 0 deletions bazel/envoy_build_system.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,34 @@ def envoy_cc_library(name,
alwayslink = 1,
)

def _git_stamped_genrule(name):
# To workaround https://github.com/bazelbuild/bazel/issues/2805, we
# do binary rewriting to replace the linker produced MD5 hash with the
# version_generated.cc git SHA1 hash (truncated).
native.genrule(
name = name + "_stamped",
srcs = [
name,
"//source/version_generated:version_generated.cc",
],
outs = [name + ".stamped"],
cmd = "cp $(location " + name + ") $@ && " +
"chmod u+w $@ && " +
"$(location //tools:git_sha_rewriter.py) " +
"$(location //source/version_generated:version_generated.cc) $@",
tools = ["//tools:git_sha_rewriter.py"],
)

# Envoy C++ binary targets should be specified with this function.
def envoy_cc_binary(name,
srcs = [],
data = [],
visibility = None,
repository = "",
deps = []):
# Implicit .stamped targets to obtain builds with the (truncated) git SHA1.
_git_stamped_genrule(name)
_git_stamped_genrule(name + ".stripped")
native.cc_binary(
name = name,
srcs = srcs,
Expand All @@ -70,9 +91,21 @@ def envoy_cc_binary(name,
linkopts = [
"-pthread",
"-lrt",
# Force MD5 hash in build. This is part of the workaround for
# https://github.com/bazelbuild/bazel/issues/2805. Bazel actually
# does this by itself prior to
# https://github.com/bazelbuild/bazel/commit/724706ba4836c3366fc85b40ed50ccf92f4c3882.
# Ironically, forcing it here so that in future releases we will
# have the same behavior. When everyone is using an updated version
# of Bazel, we can use linkopts to set the git SHA1 directly in the
# --build-id and avoid doing the following.
'-Wl,--build-id=md5',
'-Wl,--hash-style=gnu',
],
linkstatic = 1,
visibility = visibility,
# See above comment on MD5 hash.
stamp = 0,
deps = deps + [
repository + "//source/precompiled:precompiled_includes",
],
Expand Down
2 changes: 1 addition & 1 deletion source/common/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,6 @@ envoy_cc_library(
srcs = ["version.cc"],
deps = [
":version_includes",
"//:version_generated",
"//source/version_generated:version_generated",
],
)
4 changes: 2 additions & 2 deletions source/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ envoy_cc_library(
hdrs = ["options_impl.h"],
external_deps = ["tclap"],
deps = [
"//:version_generated",
"//source/version_generated:version_generated",
"//include/envoy/server:options_interface",
"//source/common/common:macros",
"//source/common/common:version_lib",
Expand All @@ -104,7 +104,7 @@ envoy_cc_library(
":connection_handler_lib",
":test_hooks_lib",
":worker_lib",
"//:version_generated",
"//source/version_generated:version_generated",
"//include/envoy/common:optional",
"//include/envoy/event:dispatcher_interface",
"//include/envoy/event:signal_interface",
Expand Down
2 changes: 1 addition & 1 deletion source/server/http/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ envoy_cc_library(
srcs = ["admin.cc"],
hdrs = ["admin.h"],
deps = [
"//:version_generated",
"//source/version_generated:version_generated",
"//include/envoy/filesystem:filesystem_interface",
"//include/envoy/http:filter_interface",
"//include/envoy/network:listen_socket_interface",
Expand Down
19 changes: 19 additions & 0 deletions source/version_generated/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package(default_visibility = ["//visibility:public"])

load("//bazel:envoy_build_system.bzl", "envoy_cc_library")

genrule(
name = "envoy_version",
srcs = glob([".git/**"]),
outs = ["version_generated.cc"],
cmd = "touch $@ && $(location //tools:gen_git_sha.sh) " +
"$$(dirname $(location //tools:gen_git_sha.sh)) $@",
local = 1,
tools = ["//tools:gen_git_sha.sh"],
)

envoy_cc_library(
name = "version_generated",
srcs = ["version_generated.cc"],
deps = ["//source/common/common:version_includes"],
)
7 changes: 7 additions & 0 deletions tools/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package(default_visibility = ["//visibility:public"])

exports_files([
"gen_git_sha.sh",
"git_sha_rewriter.py",
])

111 changes: 111 additions & 0 deletions tools/git_sha_rewriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python

# This tool takes an ELF binary that has been built with -Wl,--build-id=md5'
# '-Wl,--hash-style=gnu (as done by Bazel prior to
# https://github.com/bazelbuild/bazel/commit/724706ba4836c3366fc85b40ed50ccf92f4c3882,
# versions prior to 0.5), and replaces the MD5 compiler hash with a truncated
# git SHA1 hash found in Envoy's version_generated.cc.
#
# This is useful to folks who want the build commit in the .note.gnu.build-id
# section rather than the compiler hash of inputs. Please note that the hash is
# a 16 byte truncated git SHA1, rather than a complete 20 byte git SHA1.
# This is a workaround to https://github.com/bazelbuild/bazel/issues/2805.

import binascii
import re
import subprocess as sp
import sys

# This is what the part of .note.gnu.build-id prior to the MD5 hash looks like.
EXPECTED_BUILD_ID_NOTE_PREFIX = [
# The "name" of the note is 4 bytes long.
0x04,
0x00,
0x00,
0x00,
# The "description" of the note is 16 bytes.
0x10,
0x00,
0x00,
0x00,
# The "type" of the note.
0x03,
0x00,
0x00,
0x00,
# 'G', 'N', 'U', '\0' (name)
0x47,
0x4e,
0x55,
0x00,
]
# We're expecting an MD5 hash, 16 bytes.
MD5_HASH_LEN = 16
EXPECTED_BUILD_ID_NOTE_LENGTH = len(EXPECTED_BUILD_ID_NOTE_PREFIX) + MD5_HASH_LEN


class RewriterException(Exception):
pass


# Extract MD5 hash hex string from version_generated.cc.
def ExtractGitSha(path):
with open(path, 'r') as f:
contents = f.read()
sr = re.search('GIT_SHA\("(\w+)"', contents, flags=re.MULTILINE)
if not sr:
raise RewriterException('Bad version_generated.cc: %s' % contents)
return sr.group(1)


# Scrape the offset of .note.gnu.build-id via readelf from the binary. Also
# verify the note section is what we expect.
def ExtractBuildIdNoteOffset(path):
try:
readelf_output = sp.check_output('readelf -SW %s' % path, shell=True)
# Sanity check the ordering of fields from readelf.
if not re.search('Name\s+Type\s+Address\s+Off\s+Size\s', readelf_output):
raise RewriterException('Invalid readelf output: %s' % readelf_output)
sr = re.search('.note.gnu.build-id\s+NOTE\s+\w+\s+(\w+)\s(\w+)\s',
readelf_output)
if not sr:
raise RewriterException(
'Unable to parse .note.gnu.build-id note: %s' % readelf_output)
raw_note_offset, raw_note_size = sr.groups()
if long(raw_note_size, 16) != EXPECTED_BUILD_ID_NOTE_LENGTH:
raise RewriterException(
'Incorrect .note.gnu.build-id note size: %s' % readelf_output)
note_offset = long(raw_note_offset, 16)
with open(path, 'rb') as f:
f.seek(note_offset)
note_prefix = [ord(b) for b in f.read(len(EXPECTED_BUILD_ID_NOTE_PREFIX))]
if note_prefix != EXPECTED_BUILD_ID_NOTE_PREFIX:
raise RewriterException(
'Unexpected .note.gnu.build-id prefix in %s: %s' % (path,
note_prefix))
return note_offset
except sp.CalledProcessError as e:
raise RewriterException('%s %s' % (e, readelf_output.output))


# Inplace binary rewriting of the 16 byte .note.gnu.build-id description with
# the truncated hash.
def RewriteBinary(path, offset, git5_sha1):
truncated_hash = git5_sha1[:2 * MD5_HASH_LEN]
print 'Writing %s truncated to %s at offset 0x%x in %s' % (git5_sha1,
truncated_hash,
offset, path)
with open(path, 'r+b') as f:
f.seek(offset + len(EXPECTED_BUILD_ID_NOTE_PREFIX))
f.write(binascii.unhexlify(truncated_hash))


if __name__ == '__main__':
if len(sys.argv) != 3:
print('Usage: %s <path to version_generated.cc <Envoy binary path> ' %
sys.argv[0])
sys.exit(1)
version_generated = ExtractGitSha(sys.argv[1])
envoy_bin_path = sys.argv[2]
build_id_note_offset = ExtractBuildIdNoteOffset(envoy_bin_path)
RewriteBinary(envoy_bin_path, build_id_note_offset, version_generated)

0 comments on commit e7e4684

Please sign in to comment.