Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revamp test harness for macrobenchmark tests #4071

Merged
merged 13 commits into from
Sep 20, 2022
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 89 additions & 65 deletions ci/fireci/fireciplugins/macrobenchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import re
import shutil
import sys
import tempfile
import uuid

import click
Expand All @@ -31,42 +32,55 @@

from fireci import ci_command
from fireci import ci_utils
from fireci.dir_utils import chdir
from fireci import uploader
from fireci.dir_utils import chdir

_logger = logging.getLogger('fireci.macrobenchmark')


@click.option(
'--build-only/--no-build-only',
default=False,
help='Whether to only build tracing test apps or to also run them on FTL afterwards'
)
@ci_command()
def macrobenchmark():
def macrobenchmark(build_only):
"""Measures app startup times for Firebase SDKs."""
asyncio.run(_launch_macrobenchmark_test())
asyncio.run(_launch_macrobenchmark_test(build_only))


async def _launch_macrobenchmark_test():
async def _launch_macrobenchmark_test(build_only):
_logger.info('Starting macrobenchmark test...')

artifact_versions, config, _, _ = await asyncio.gather(
_parse_artifact_versions(),
_parse_config_yaml(),
_create_gradle_wrapper(),
_copy_google_services(),
)

artifact_versions = await _assemble_all_artifacts()
_logger.info(f'Artifact versions: {artifact_versions}')

with chdir('health-metrics/macrobenchmark'):
runners = [MacrobenchmarkTest(k, v, artifact_versions) for k, v in config.items()]
results = await asyncio.gather(*[x.run() for x in runners], return_exceptions=True)
test_dir = await _prepare_test_directory()
_logger.info(f'Directory for test apps: {test_dir}')

config = await _process_config_yaml()
_logger.info(f'Processed yaml configurations: {config}')

tests = [MacrobenchmarkTest(app, artifact_versions, os.getcwd(), test_dir) for app in config['test-apps']]

await _post_processing(results)
_logger.info(f'Building {len(tests)} macrobenchmark test apps...')
# TODO(yifany): investigate why it is much slower with asyncio.gather
# - on corp workstations (9 min) than M1 macbook pro (3 min)
# - with gradle 7.5.1 (9 min) than gradle 6.9.2 (5 min)
# await asyncio.gather(*[x.build() for x in tests])
for test in tests:
await test.build()

if not build_only:
_logger.info(f'Submitting {len(tests)} tests to Firebase Test Lab...')
results = await asyncio.gather(*[x.test() for x in tests], return_exceptions=True)
await _post_processing(results)

_logger.info('Macrobenchmark test finished.')


async def _parse_artifact_versions():
proc = await asyncio.subprocess.create_subprocess_exec('./gradlew', 'assembleAllForSmokeTests')
await proc.wait()
async def _assemble_all_artifacts():
await (await asyncio.create_subprocess_exec('./gradlew', 'assembleAllForSmokeTests')).wait()

with open('build/m2repository/changed-artifacts.json') as json_file:
artifacts = json.load(json_file)
Expand All @@ -78,35 +92,39 @@ def _artifact_key_version(artifact):
return f'{group_id}:{artifact_id}', version


async def _parse_config_yaml():
with open('health-metrics/macrobenchmark/config.yaml') as yaml_file:
return yaml.safe_load(yaml_file)
async def _process_config_yaml():
with open('health-metrics/benchmark/config.yaml') as yaml_file:
config = yaml.safe_load(yaml_file)
for app in config['test-apps']:
app['plugins'] = app.get('plugins', [])
app['traces'] = app.get('traces', [])
app['plugins'].extend(config['common-plugins'])
app['traces'].extend(config['common-traces'])

# Adding an empty android app for baseline comparison
config['test-apps'].insert(0, {'sdk': 'baseline', 'name': 'baseline'})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we are now measuring Firebase traces only, do we still need this baseline to be present?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Removed it and the reference to it in the README.

return config


async def _create_gradle_wrapper():
with open('health-metrics/macrobenchmark/settings.gradle', 'w'):
pass
async def _prepare_test_directory():
test_dir = tempfile.mkdtemp(prefix='benchmark-test-')

proc = await asyncio.subprocess.create_subprocess_exec(
'./gradlew',
'wrapper',
'--gradle-version',
'6.9',
'--project-dir',
'health-metrics/macrobenchmark'
)
await proc.wait()
# Required for creating gradle wrapper, as the dir is not defined in the root settings.gradle
open(os.path.join(test_dir, 'settings.gradle'), 'w').close()

command = ['./gradlew', 'wrapper', '--gradle-version', '7.5.1', '--project-dir', test_dir]
await (await asyncio.create_subprocess_exec(*command)).wait()

async def _copy_google_services():
if 'FIREBASE_CI' in os.environ:
src = os.environ['FIREBASE_GOOGLE_SERVICES_PATH']
dst = 'health-metrics/macrobenchmark/template/app/google-services.json'
_logger.info(f'Running on CI. Copying "{src}" to "{dst}"...')
shutil.copyfile(src, dst)
return test_dir


async def _post_processing(results):
_logger.info(f'Macrobenchmark results: {results}')

if os.getenv('CI') is None:
_logger.info('Running locally. Results upload skipped.')
return

# Upload successful measurements to the metric service
measurements = []
for result in results:
Expand All @@ -130,51 +148,63 @@ class MacrobenchmarkTest:
"""Builds the test based on configurations and runs the test on FTL."""
def __init__(
self,
sdk_name,
test_app_config,
artifact_versions,
repo_root_dir,
test_dir,
logger=_logger
):
self.sdk_name = sdk_name
self.test_app_config = test_app_config
self.artifact_versions = artifact_versions
self.logger = MacrobenchmarkLoggerAdapter(logger, sdk_name)
self.test_app_dir = os.path.join('test-apps', test_app_config['name'])
self.repo_root_dir = repo_root_dir
self.test_dir = test_dir
self.logger = MacrobenchmarkLoggerAdapter(logger, test_app_config['sdk'])
self.test_app_dir = os.path.join(test_dir, test_app_config['name'])
self.test_results_bucket = 'fireescape-benchmark-results'
self.test_results_dir = str(uuid.uuid4())
self.gcs_client = storage.Client()

async def run(self):
"""Starts the workflow of src creation, apks assembly, FTL testing and results upload."""
async def build(self):
"""Creates test app project and assembles app and test apks."""
await self._create_benchmark_projects()
await self._assemble_benchmark_apks()

async def test(self):
"""Runs benchmark tests on FTL and fetches FTL results from GCS."""
await self._execute_benchmark_tests()
return await self._aggregate_benchmark_results()

async def _create_benchmark_projects(self):
app_name = self.test_app_config['name']
self.logger.info(f'Creating test app "{app_name}"...')

mustache_context = await self._prepare_mustache_context()
self.logger.info(f'Copying project template files into "{self.test_app_dir}"...')
template_dir = os.path.join(self.repo_root_dir, 'health-metrics/benchmark/template')
shutil.copytree(template_dir, self.test_app_dir)

self.logger.info(f'Copying gradle wrapper binary into "{self.test_app_dir}"...')
shutil.copy(os.path.join(self.test_dir, 'gradlew'), self.test_app_dir)
shutil.copy(os.path.join(self.test_dir, 'gradlew.bat'), self.test_app_dir)
shutil.copytree(os.path.join(self.test_dir, 'gradle'), os.path.join(self.test_app_dir, 'gradle'))

shutil.copytree('template', self.test_app_dir)
with chdir(self.test_app_dir):
mustache_context = await self._prepare_mustache_context()
renderer = pystache.Renderer()
mustaches = glob.glob('**/*.mustache', recursive=True)
for mustache in mustaches:
self.logger.info(f'Processing template file: {mustache}')
result = renderer.render_path(mustache, mustache_context)
original_name = mustache[:-9] # TODO(yifany): mustache.removesuffix('.mustache')
original_name = mustache.removesuffix('.mustache')
with open(original_name, 'w') as file:
file.write(result)

async def _assemble_benchmark_apks(self):
executable = './gradlew'
args = ['assemble', 'assembleAndroidTest', '--project-dir', self.test_app_dir]
await self._exec_subprocess(executable, args)
with chdir(self.test_app_dir):
await self._exec_subprocess('./gradlew', ['assemble'])

async def _execute_benchmark_tests(self):
app_apk_path = glob.glob(f'{self.test_app_dir}/app/**/*.apk', recursive=True)[0]
test_apk_path = glob.glob(f'{self.test_app_dir}/benchmark/**/*.apk', recursive=True)[0]
app_apk_path = glob.glob(f'{self.test_app_dir}/**/app-benchmark.apk', recursive=True)[0]
test_apk_path = glob.glob(f'{self.test_app_dir}/**/macrobenchmark-benchmark.apk', recursive=True)[0]

self.logger.info(f'App apk: {app_apk_path}')
self.logger.info(f'Test apk: {test_apk_path}')
Expand All @@ -189,7 +219,7 @@ async def _execute_benchmark_tests(self):
args += ['--type', 'instrumentation']
args += ['--app', app_apk_path]
args += ['--test', test_apk_path]
args += ['--device', 'model=redfin,version=30,locale=en,orientation=portrait']
args += ['--device', 'model=oriole,version=32,locale=en,orientation=portrait']
args += ['--directories-to-pull', '/sdcard/Download']
args += ['--results-bucket', f'gs://{self.test_results_bucket}']
args += ['--results-dir', self.test_results_dir]
Expand All @@ -200,19 +230,13 @@ async def _execute_benchmark_tests(self):
await self._exec_subprocess(executable, args)

async def _prepare_mustache_context(self):
app_name = self.test_app_config['name']

mustache_context = {
'plugins': [],
'm2repository': os.path.join(self.repo_root_dir, 'build/m2repository'),
'plugins': self.test_app_config.get('plugins', []),
'traces': self.test_app_config.get('traces', []),
'dependencies': [],
}

if app_name != 'baseline':
mustache_context['plugins'].append('com.google.gms.google-services')

if 'plugins' in self.test_app_config:
mustache_context['plugins'].extend(self.test_app_config['plugins'])

if 'dependencies' in self.test_app_config:
for dep in self.test_app_config['dependencies']:
if '@' in dep:
Expand All @@ -234,9 +258,9 @@ async def _aggregate_benchmark_results(self):
for benchmark in benchmarks:
method = benchmark['name']
clazz = benchmark['className'].split('.')[-1]
runs = benchmark['metrics']['startupMs']['runs']
runs = benchmark['metrics']['timeToInitialDisplayMs']['runs']
results.append({
'sdk': self.sdk_name,
'sdk': self.test_app_config['sdk'],
'device': device,
'name': f'{clazz}.{method}',
'min': min(runs),
Expand Down
12 changes: 11 additions & 1 deletion health-metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,14 @@ Refer to [README.md](apk-size/README.md) in the subdirectory `apk-size` for more

## App startup time

**TODO(yifany)**: Add more details once the measurement tools and infrastructure is ready.
Firebase runs during different
[app lifecycle](https://d.android.com/guide/components/activities/process-lifecycle)
phases, and contributes to the overall
[app startup time](https://d.android.com/topic/performance/vitals/launch-time)
in many ways.

We are currently using
[benchmarking](https://d.android.com/topic/performance/benchmarking/benchmarking-overview)
and [tracing](https://d.android.com/topic/performance/tracing) to measure its
latency impact. Refer to [README.md](benchmark/README.md) in the subdirectory
`benchmark` for more details.
117 changes: 117 additions & 0 deletions health-metrics/benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Benchmark

This directory contains the benchmark test apps used for measuring latency for
initializing Firebase Android SDKs during app startup.

## Test app configurations

[config.yaml](config.yaml) contains a list of configuration blocks for
building a macrobenchmark test app for each of the Firebase Android SDKs.
If not all of them are required, comment out irrelevant ones for faster build
and test time.

**Note**: An empty Android app (named `baseline`) will be created during runtime
for baseline comparison, even though it is not listed in
[config.yaml](config.yaml). This app can also be used as a skeleton for
setting up a more complex benchmark testing app.

## Run benchmark tests

### Prerequisite

1. `fireci` CLI tool

Refer to its [readme](../../ci/fireci/README.md) for how to install it.

1. `google-services.json`

Download it from the Firebase project
[`fireescape-integ-tests`](https://firebase.corp.google.com/u/0/project/fireescape-integ-tests)
to the directory `./template/app`.

1. Authentication to Google Cloud

Authentication is required by Google Cloud SDK and Google Cloud Storage
client library used in the benchmark tests.

One simple way is to configure it is to set an environment variable
`GOOGLE_APPLICATION_CREDENTIALS` to a service account key file. However,
please refer to the official Google Cloud
[doc](https://cloud.google.com/docs/authentication) for full guidance on
authentication.

### Run benchmark tests locally

1. Build all test apps by running below command in the root
directory `firebase-android-sdk`:

```shell
fireci macrobenchmark --build-only
```

1. [Connect an Android device to the computer](https://d.android.com/studio/run/device)

1. Locate the temporary test apps directory from the log, for example:

- on linux: `/tmp/benchmark-test-*/`
- on macos: `/var/folders/**/benchmark-test-*/`

1. Start the benchmark tests from CLI or Android Studio:

- CLI

Run below command in the above test app project directory

```
./gradlew :macrobenchmark:connectedCheck
```

- Android Studio

1. Import the project (e.g. `**/benchmark-test-*/firestore`) into Android Studio
1. Start the benchmark test by clicking gutter icons in the file `BenchmarkTest.kt`

1. Inspect the benchmark test results:

- CLI

Result files are created in `<test-app-dir>/macrobenchmark/build/outputs/`:

- `*-benchmarkData.json` contains metric aggregates
- `*.perfetto-trace` are the raw trace files

Additionally, upload `.perfetto-trace` files to
[Perfetto Trace Viewer](https://ui.perfetto.dev/) to visualize all traces.

- Android Studio

Test results are displayed directly in the "Run" tool window, including

- macrobenchmark built-in metrics
- duration of custom traces
- links to trace files that can be visualized within the IDE

Alternatively, same set of result files are produced at the same output
location as invoking tests from CLI, which can be used for inspection.

### Run benchmark tests on Firebase Test Lab

Build and run all tests on FTL by running below command in the root
directory `firebase-android-sdk`:

```
fireci macrobenchmark
```

Alternatively, it is possible to build all test apps via steps described in
[Running benchmark tests locally](#running-benchmark-tests-locally)
and manually
[run tests on FTL with `gcloud` CLI ](https://firebase.google.com/docs/test-lab/android/command-line#running_your_instrumentation_tests).

Aggregated benchmark results are displayed in the log. The log also
contains links to FTL result pages and result files on Google Cloud Storage.

## Toolchains

- Gradle 7.5.1
- Android Gradle Plugin 7.2.2
Loading