Revamp test harness for macrobenchmark tests (#4071)

firebase · Sep 20, 2022 · 523730a · 523730a
1 parent 65356d8
commit 523730a
Show file tree

Hide file tree

Showing 53 changed files with 506 additions and 393 deletions.
diff --git a/ci/fireci/fireciplugins/macrobenchmark.py b/ci/fireci/fireciplugins/macrobenchmark.py
@@ -21,6 +21,7 @@
 import re
 import shutil
 import sys
+import tempfile
 import uuid
 
 import click
@@ -31,42 +32,55 @@
 
 from fireci import ci_command
 from fireci import ci_utils
-from fireci.dir_utils import chdir
 from fireci import uploader
+from fireci.dir_utils import chdir
 
 _logger = logging.getLogger('fireci.macrobenchmark')
 
 
+@click.option(
+  '--build-only/--no-build-only',
+  default=False,
+  help='Whether to only build tracing test apps or to also run them on FTL afterwards'
+)
 @ci_command()
-def macrobenchmark():
+def macrobenchmark(build_only):
   """Measures app startup times for Firebase SDKs."""
-  asyncio.run(_launch_macrobenchmark_test())
+  asyncio.run(_launch_macrobenchmark_test(build_only))
 
 
-async def _launch_macrobenchmark_test():
+async def _launch_macrobenchmark_test(build_only):
   _logger.info('Starting macrobenchmark test...')
 
-  artifact_versions, config, _, _ = await asyncio.gather(
-    _parse_artifact_versions(),
-    _parse_config_yaml(),
-    _create_gradle_wrapper(),
-    _copy_google_services(),
-  )
-
+  artifact_versions = await _assemble_all_artifacts()
   _logger.info(f'Artifact versions: {artifact_versions}')
 
-  with chdir('health-metrics/macrobenchmark'):
-    runners = [MacrobenchmarkTest(k, v, artifact_versions) for k, v in config.items()]
-    results = await asyncio.gather(*[x.run() for x in runners], return_exceptions=True)
+  test_dir = await _prepare_test_directory()
+  _logger.info(f'Directory for test apps: {test_dir}')
+
+  config = await _process_config_yaml()
+  _logger.info(f'Processed yaml configurations: {config}')
+
+  tests = [MacrobenchmarkTest(app, artifact_versions, os.getcwd(), test_dir) for app in config['test-apps']]
 
-  await _post_processing(results)
+  _logger.info(f'Building {len(tests)} macrobenchmark test apps...')
+  # TODO(yifany): investigate why it is much slower with asyncio.gather
+  #   - on corp workstations (9 min) than M1 macbook pro (3 min)
+  #   - with gradle 7.5.1 (9 min) than gradle 6.9.2 (5 min)
+  # await asyncio.gather(*[x.build() for x in tests])
+  for test in tests:
+    await test.build()
+
+  if not build_only:
+    _logger.info(f'Submitting {len(tests)} tests to Firebase Test Lab...')
+    results = await asyncio.gather(*[x.test() for x in tests], return_exceptions=True)
+    await _post_processing(results)
 
   _logger.info('Macrobenchmark test finished.')
 
 
-async def _parse_artifact_versions():
-  proc = await asyncio.subprocess.create_subprocess_exec('./gradlew', 'assembleAllForSmokeTests')
-  await proc.wait()
+async def _assemble_all_artifacts():
+  await (await asyncio.create_subprocess_exec('./gradlew', 'assembleAllForSmokeTests')).wait()
 
   with open('build/m2repository/changed-artifacts.json') as json_file:
     artifacts = json.load(json_file)
@@ -78,35 +92,36 @@ def _artifact_key_version(artifact):
   return f'{group_id}:{artifact_id}', version
 
 
-async def _parse_config_yaml():
-  with open('health-metrics/macrobenchmark/config.yaml') as yaml_file:
-    return yaml.safe_load(yaml_file)
+async def _process_config_yaml():
+  with open('health-metrics/benchmark/config.yaml') as yaml_file:
+    config = yaml.safe_load(yaml_file)
+    for app in config['test-apps']:
+      app['plugins'] = app.get('plugins', [])
+      app['traces'] = app.get('traces', [])
+      app['plugins'].extend(config['common-plugins'])
+      app['traces'].extend(config['common-traces'])
+    return config
 
 
-async def _create_gradle_wrapper():
-  with open('health-metrics/macrobenchmark/settings.gradle', 'w'):
-    pass
+async def _prepare_test_directory():
+  test_dir = tempfile.mkdtemp(prefix='benchmark-test-')
 
-  proc = await asyncio.subprocess.create_subprocess_exec(
-    './gradlew',
-    'wrapper',
-    '--gradle-version',
-    '6.9',
-    '--project-dir',
-    'health-metrics/macrobenchmark'
-  )
-  await proc.wait()
+  # Required for creating gradle wrapper, as the dir is not defined in the root settings.gradle
+  open(os.path.join(test_dir, 'settings.gradle'), 'w').close()
 
+  command = ['./gradlew', 'wrapper', '--gradle-version', '7.5.1', '--project-dir', test_dir]
+  await (await asyncio.create_subprocess_exec(*command)).wait()
 
-async def _copy_google_services():
-  if 'FIREBASE_CI' in os.environ:
-    src = os.environ['FIREBASE_GOOGLE_SERVICES_PATH']
-    dst = 'health-metrics/macrobenchmark/template/app/google-services.json'
-    _logger.info(f'Running on CI. Copying "{src}" to "{dst}"...')
-    shutil.copyfile(src, dst)
+  return test_dir
 
 
 async def _post_processing(results):
+  _logger.info(f'Macrobenchmark results: {results}')
+
+  if os.getenv('CI') is None:
+    _logger.info('Running locally. Results upload skipped.')
+    return
+
   # Upload successful measurements to the metric service
   measurements = []
   for result in results:
@@ -130,51 +145,63 @@ class MacrobenchmarkTest:
   """Builds the test based on configurations and runs the test on FTL."""
   def __init__(
       self,
-      sdk_name,
       test_app_config,
       artifact_versions,
+      repo_root_dir,
+      test_dir,
       logger=_logger
   ):
-    self.sdk_name = sdk_name
     self.test_app_config = test_app_config
     self.artifact_versions = artifact_versions
-    self.logger = MacrobenchmarkLoggerAdapter(logger, sdk_name)
-    self.test_app_dir = os.path.join('test-apps', test_app_config['name'])
+    self.repo_root_dir = repo_root_dir
+    self.test_dir = test_dir
+    self.logger = MacrobenchmarkLoggerAdapter(logger, test_app_config['sdk'])
+    self.test_app_dir = os.path.join(test_dir, test_app_config['name'])
     self.test_results_bucket = 'fireescape-benchmark-results'
     self.test_results_dir = str(uuid.uuid4())
     self.gcs_client = storage.Client()
 
-  async def run(self):
-    """Starts the workflow of src creation, apks assembly, FTL testing and results upload."""
+  async def build(self):
+    """Creates test app project and assembles app and test apks."""
     await self._create_benchmark_projects()
     await self._assemble_benchmark_apks()
+
+  async def test(self):
+    """Runs benchmark tests on FTL and fetches FTL results from GCS."""
     await self._execute_benchmark_tests()
     return await self._aggregate_benchmark_results()
 
   async def _create_benchmark_projects(self):
     app_name = self.test_app_config['name']
     self.logger.info(f'Creating test app "{app_name}"...')
 
-    mustache_context = await self._prepare_mustache_context()
+    self.logger.info(f'Copying project template files into "{self.test_app_dir}"...')
+    template_dir = os.path.join(self.repo_root_dir, 'health-metrics/benchmark/template')
+    shutil.copytree(template_dir, self.test_app_dir)
+
+    self.logger.info(f'Copying gradle wrapper binary into "{self.test_app_dir}"...')
+    shutil.copy(os.path.join(self.test_dir, 'gradlew'), self.test_app_dir)
+    shutil.copy(os.path.join(self.test_dir, 'gradlew.bat'), self.test_app_dir)
+    shutil.copytree(os.path.join(self.test_dir, 'gradle'), os.path.join(self.test_app_dir, 'gradle'))
 
-    shutil.copytree('template', self.test_app_dir)
     with chdir(self.test_app_dir):
+      mustache_context = await self._prepare_mustache_context()
       renderer = pystache.Renderer()
       mustaches = glob.glob('**/*.mustache', recursive=True)
       for mustache in mustaches:
+        self.logger.info(f'Processing template file: {mustache}')
         result = renderer.render_path(mustache, mustache_context)
-        original_name = mustache[:-9]  # TODO(yifany): mustache.removesuffix('.mustache')
+        original_name = mustache.removesuffix('.mustache')
         with open(original_name, 'w') as file:
           file.write(result)
 
   async def _assemble_benchmark_apks(self):
-    executable = './gradlew'
-    args = ['assemble', 'assembleAndroidTest', '--project-dir', self.test_app_dir]
-    await self._exec_subprocess(executable, args)
+    with chdir(self.test_app_dir):
+      await self._exec_subprocess('./gradlew', ['assemble'])
 
   async def _execute_benchmark_tests(self):
-    app_apk_path = glob.glob(f'{self.test_app_dir}/app/**/*.apk', recursive=True)[0]
-    test_apk_path = glob.glob(f'{self.test_app_dir}/benchmark/**/*.apk', recursive=True)[0]
+    app_apk_path = glob.glob(f'{self.test_app_dir}/**/app-benchmark.apk', recursive=True)[0]
+    test_apk_path = glob.glob(f'{self.test_app_dir}/**/macrobenchmark-benchmark.apk', recursive=True)[0]
 
     self.logger.info(f'App apk: {app_apk_path}')
     self.logger.info(f'Test apk: {test_apk_path}')
@@ -189,7 +216,7 @@ async def _execute_benchmark_tests(self):
     args += ['--type', 'instrumentation']
     args += ['--app', app_apk_path]
     args += ['--test', test_apk_path]
-    args += ['--device', 'model=redfin,version=30,locale=en,orientation=portrait']
+    args += ['--device', 'model=oriole,version=32,locale=en,orientation=portrait']
     args += ['--directories-to-pull', '/sdcard/Download']
     args += ['--results-bucket', f'gs://{self.test_results_bucket}']
     args += ['--results-dir', self.test_results_dir]
@@ -200,19 +227,13 @@ async def _execute_benchmark_tests(self):
     await self._exec_subprocess(executable, args)
 
   async def _prepare_mustache_context(self):
-    app_name = self.test_app_config['name']
-
     mustache_context = {
-      'plugins': [],
+      'm2repository': os.path.join(self.repo_root_dir, 'build/m2repository'),
+      'plugins': self.test_app_config.get('plugins', []),
+      'traces': self.test_app_config.get('traces', []),
       'dependencies': [],
     }
 
-    if app_name != 'baseline':
-      mustache_context['plugins'].append('com.google.gms.google-services')
-
-    if 'plugins' in self.test_app_config:
-      mustache_context['plugins'].extend(self.test_app_config['plugins'])
-
     if 'dependencies' in self.test_app_config:
       for dep in self.test_app_config['dependencies']:
         if '@' in dep:
@@ -234,9 +255,9 @@ async def _aggregate_benchmark_results(self):
       for benchmark in benchmarks:
         method = benchmark['name']
         clazz = benchmark['className'].split('.')[-1]
-        runs = benchmark['metrics']['startupMs']['runs']
+        runs = benchmark['metrics']['timeToInitialDisplayMs']['runs']
         results.append({
-          'sdk': self.sdk_name,
+          'sdk': self.test_app_config['sdk'],
           'device': device,
           'name': f'{clazz}.{method}',
           'min': min(runs),

diff --git a/health-metrics/README.md b/health-metrics/README.md
@@ -12,4 +12,14 @@ Refer to [README.md](apk-size/README.md) in the subdirectory `apk-size` for more
 
 ## App startup time
 
-**TODO(yifany)**: Add more details once the measurement tools and infrastructure is ready.
+Firebase runs during different
+[app lifecycle](https://d.android.com/guide/components/activities/process-lifecycle)
+phases, and contributes to the overall
+[app startup time](https://d.android.com/topic/performance/vitals/launch-time)
+in many ways.
+
+We are currently using
+[benchmarking](https://d.android.com/topic/performance/benchmarking/benchmarking-overview)
+and [tracing](https://d.android.com/topic/performance/tracing) to measure its
+latency impact. Refer to [README.md](benchmark/README.md) in the subdirectory
+`benchmark` for more details.
diff --git a/health-metrics/benchmark/README.md b/health-metrics/benchmark/README.md
@@ -0,0 +1,112 @@
+# Benchmark
+
+This directory contains the benchmark test apps used for measuring latency for
+initializing Firebase Android SDKs during app startup.
+
+## Test app configurations
+
+[config.yaml](config.yaml) contains a list of configuration blocks for
+building a macrobenchmark test app for each of the Firebase Android SDKs.
+If not all of them are required, comment out irrelevant ones for faster build
+and test time.
+
+## Run benchmark tests
+
+### Prerequisite
+
+1. `fireci` CLI tool
+
+   Refer to its [readme](../../ci/fireci/README.md) for how to install it.
+
+1. `google-services.json`
+
+   Download it from the Firebase project
+   [`fireescape-integ-tests`](https://firebase.corp.google.com/u/0/project/fireescape-integ-tests)
+   to the directory `./template/app`.
+
+1. Authentication to Google Cloud
+
+   Authentication is required by Google Cloud SDK and Google Cloud Storage
+   client library used in the benchmark tests.
+
+   One simple way is to configure it is to set an environment variable
+   `GOOGLE_APPLICATION_CREDENTIALS` to a service account key file. However,
+   please refer to the official Google Cloud
+   [doc](https://cloud.google.com/docs/authentication) for full guidance on
+   authentication.
+
+### Run benchmark tests locally
+
+1. Build all test apps by running below command in the root
+   directory `firebase-android-sdk`:
+
+   ```shell
+   fireci macrobenchmark --build-only
+   ```
+
+1. [Connect an Android device to the computer](https://d.android.com/studio/run/device)
+
+1. Locate the temporary test apps directory from the log, for example:
+
+   - on linux: `/tmp/benchmark-test-*/`
+   - on macos: `/var/folders/**/benchmark-test-*/`
+
+1. Start the benchmark tests from CLI or Android Studio:
+
+   - CLI
+
+     Run below command in the above test app project directory
+
+     ```
+     ./gradlew :macrobenchmark:connectedCheck
+     ```
+
+   - Android Studio
+
+     1. Import the project (e.g. `**/benchmark-test-*/firestore`) into Android Studio
+     1. Start the benchmark test by clicking gutter icons in the file `BenchmarkTest.kt`
+
+1. Inspect the benchmark test results:
+
+   - CLI
+
+     Result files are created in `<test-app-dir>/macrobenchmark/build/outputs/`:
+
+     - `*-benchmarkData.json` contains metric aggregates
+     - `*.perfetto-trace` are the raw trace files
+
+     Additionally, upload `.perfetto-trace` files to
+     [Perfetto Trace Viewer](https://ui.perfetto.dev/) to visualize all traces.
+
+   - Android Studio
+
+     Test results are displayed directly in the "Run" tool window, including
+
+     - macrobenchmark built-in metrics
+     - duration of custom traces
+     - links to trace files that can be visualized within the IDE
+
+     Alternatively, same set of result files are produced at the same output
+     location as invoking tests from CLI, which can be used for inspection.
+
+### Run benchmark tests on Firebase Test Lab
+
+Build and run all tests on FTL by running below command in the root
+directory `firebase-android-sdk`:
+
+```
+fireci macrobenchmark
+```
+
+Alternatively, it is possible to build all test apps via steps described in
+[Running benchmark tests locally](#running-benchmark-tests-locally)
+and manually
+[run tests on FTL with `gcloud` CLI ](https://firebase.google.com/docs/test-lab/android/command-line#running_your_instrumentation_tests).
+
+Aggregated benchmark results are displayed in the log. The log also
+contains links to FTL result pages and result files on Google Cloud Storage.
+
+## Toolchains
+
+- Gradle 7.5.1
+- Android Gradle Plugin 7.2.2