From ee350f4c5345f780b8d65d11f8bd24d65c433290 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 30 Oct 2024 00:56:59 +0000 Subject: [PATCH 1/2] Fix use after move to get exegesis annotator working in pipeline This patch fixes a user after move in BHiveToExegesis/related classes where we would create a LLVMState, pass it into a couple other constructors to create things that would store a reference to LLVMState, and then promptly move it to call the BHiveToExegesis constructor. This resulted in use after moves, which were ultimately caught by ubsan. This patch fixes that behavior by encapsulating LLVMState within a unique_ptr to prevent the problems. The exegesis annotator is also enabled in the compile_modules invocation script and test coverage is added now that it works. --- gematria/datasets/bhive_to_exegesis.cc | 8 ++++--- gematria/datasets/bhive_to_exegesis.h | 4 ++-- .../datasets/pipelines/compile_modules.py | 12 +++++++---- .../pipelines/compile_modules_lib_test.py | 21 ++++++++++++------- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/gematria/datasets/bhive_to_exegesis.cc b/gematria/datasets/bhive_to_exegesis.cc index 1237a2e2..958f1e7e 100644 --- a/gematria/datasets/bhive_to_exegesis.cc +++ b/gematria/datasets/bhive_to_exegesis.cc @@ -47,7 +47,7 @@ namespace gematria { BHiveToExegesis::BHiveToExegesis( LlvmArchitectureSupport& ArchitectureSupport, - llvm::exegesis::LLVMState&& LLVMExegesisState, + std::unique_ptr&& LLVMExegesisState, std::unique_ptr&& LLVMExegesisAnnotator) : LLVMAnnotator(std::move(LLVMExegesisAnnotator)), ExegesisState(std::move(LLVMExegesisState)), @@ -60,13 +60,15 @@ Expected> BHiveToExegesis::create( LlvmArchitectureSupport& ArchitectureSupport) { Expected LLVMStateOrErr = LLVMState::Create("", "native"); if (!LLVMStateOrErr) return LLVMStateOrErr.takeError(); + std::unique_ptr LLVMStateOwner( + new LLVMState(std::move(*LLVMStateOrErr))); Expected> AnnotatorOrErr = - ExegesisAnnotator::create(*LLVMStateOrErr); + ExegesisAnnotator::create(*LLVMStateOwner); if (!AnnotatorOrErr) return AnnotatorOrErr.takeError(); return std::unique_ptr( - new BHiveToExegesis(ArchitectureSupport, std::move(*LLVMStateOrErr), + new BHiveToExegesis(ArchitectureSupport, std::move(LLVMStateOwner), std::move(*AnnotatorOrErr))); } diff --git a/gematria/datasets/bhive_to_exegesis.h b/gematria/datasets/bhive_to_exegesis.h index a48e88a9..a3afd4ae 100644 --- a/gematria/datasets/bhive_to_exegesis.h +++ b/gematria/datasets/bhive_to_exegesis.h @@ -49,7 +49,7 @@ class BHiveToExegesis { private: std::unique_ptr LLVMAnnotator; - llvm::exegesis::LLVMState ExegesisState; + std::unique_ptr ExegesisState; gematria::X86Canonicalizer Canonicalizer; gematria::BHiveImporter BHiveImporter; LlvmArchitectureSupport &ArchSupport; @@ -57,7 +57,7 @@ class BHiveToExegesis { BHiveToExegesis( LlvmArchitectureSupport &ArchitectureSupport, - llvm::exegesis::LLVMState &&LLVMExegesisState, + std::unique_ptr &&LLVMExegesisState, std::unique_ptr &&LLVMExegesisAnnotator); absl::StatusOr getAccessedAddrs( diff --git a/gematria/datasets/pipelines/compile_modules.py b/gematria/datasets/pipelines/compile_modules.py index 55645ac6..bd8fc332 100644 --- a/gematria/datasets/pipelines/compile_modules.py +++ b/gematria/datasets/pipelines/compile_modules.py @@ -47,7 +47,10 @@ ) _ANNOTATOR_TYPE = flags.DEFINE_enum( - 'annotator_type', 'fast', ['fast'], 'The type of annotator to use.' + 'annotator_type', + 'fast', + ['fast', 'exegesis'], + 'The type of annotator to use.', ) _MAX_ANNOTATION_ATTEMPTS = flags.DEFINE_integer( @@ -56,9 +59,10 @@ 'The maximum number of times to try annotating a block before giving up', ) -# TODO(boomanaiden154): Currently, only the fast annotator works. Eventually -# this should be fixed so we can use the exegesis annotator too. -ANNOTATOR_MAPPING = {'fast': bhive_to_exegesis.AnnotatorType.fast} +ANNOTATOR_MAPPING = { + 'fast': bhive_to_exegesis.AnnotatorType.fast, + 'exegesis': bhive_to_exegesis.AnnotatorType.exegesis, +} def main(argv) -> None: diff --git a/gematria/datasets/pipelines/compile_modules_lib_test.py b/gematria/datasets/pipelines/compile_modules_lib_test.py index 85d6f8c3..4db4c3ea 100644 --- a/gematria/datasets/pipelines/compile_modules_lib_test.py +++ b/gematria/datasets/pipelines/compile_modules_lib_test.py @@ -16,6 +16,7 @@ import textwrap from absl.testing import absltest +from absl.testing import parameterized import apache_beam as beam from apache_beam.testing import test_pipeline from apache_beam.testing import util as beam_test @@ -27,7 +28,7 @@ from gematria.io.python import tfrecord -class CompileModulesTests(absltest.TestCase): +class CompileModulesTests(parameterized.TestCase): def test_optimize_modules(self): ir_string = textwrap.dedent("""\ @@ -88,10 +89,12 @@ def test_deduplicate_values(self): output = input | compile_modules_lib.DeduplicateValues() beam_test.assert_that(output, beam_test.equal_to(['aa', 'ab', 'bc'])) - def test_annotate_bbs(self): - annotator = compile_modules_lib.AnnotateBBs( - bhive_to_exegesis.AnnotatorType.fast, 50 - ) + @parameterized.parameters([ + bhive_to_exegesis.AnnotatorType.fast, + bhive_to_exegesis.AnnotatorType.exegesis, + ]) + def test_annotate_bbs(self, annotator_type): + annotator = compile_modules_lib.AnnotateBBs(annotator_type, 50) annotator.setup() annotated_blocks = list( @@ -150,7 +153,11 @@ def test_process_and_filter_bbs(self): filtered_bbs = list(process_and_filter_transform.process(bb_hex)) self.assertLen(filtered_bbs, 0) - def test_get_bbs(self): + @parameterized.parameters([ + bhive_to_exegesis.AnnotatorType.fast, + bhive_to_exegesis.AnnotatorType.exegesis, + ]) + def test_get_bbs(self, annotator_type): ir_string1 = textwrap.dedent("""\ define i32 @a() { ret i32 1 @@ -175,7 +182,7 @@ def test_get_bbs(self): test_parquet_file.full_path, output_file_pattern, False, - bhive_to_exegesis.AnnotatorType.fast, + annotator_type, 50, vocab_output_file_pattern, ) From 410b5e33d676ee37d5d585c6557fdb61e99fc49b Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 30 Oct 2024 01:03:28 +0000 Subject: [PATCH 2/2] Add perf counters tag --- gematria/datasets/pipelines/BUILD.bazel | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gematria/datasets/pipelines/BUILD.bazel b/gematria/datasets/pipelines/BUILD.bazel index 3b6fbf38..df51ec31 100644 --- a/gematria/datasets/pipelines/BUILD.bazel +++ b/gematria/datasets/pipelines/BUILD.bazel @@ -40,6 +40,9 @@ gematria_py_test( env = { "PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION": "python", }, + tags = [ + "perf_counters", + ], deps = [ ":compile_modules_lib", "//gematria/testing/python:ir_utils",