Skip to content

Commit af3f5e8

Browse files
NuriAmarifacebook-github-bot
authored andcommitted
Support "virtual archives"
Summary: Currently Darwin distributed thin-lto builds produce a static archive from every apple_library target used as input to a link with `archive` actions, then unarchive them with `thin_lto_prepare` actions, passing the object files directly to the link. Creating static archives only to unarchive them immediately is wasteful, we can save RE time, cache capacity and build time by just not creating the archives in the first place. cxx_library rules (which apple_library is a wrapper on top of) produce either an [ObjectsLinkable](https://www.internalfb.com/code/fbsource/[1c6e9dd34c19f8d85fad642c437287dd873cb12e]/fbcode/buck2/prelude/linking/link_info.bzl?lines=107) or an [ArchiveLinkable](https://www.internalfb.com/code/fbsource/[1c6e9dd34c19f8d85fad642c437287dd873cb12e]/fbcode/buck2/prelude/linking/link_info.bzl?lines=87) linkable depending on configuration. Darwin links consume these linkables according to their traditional semantics (objects linkables eagerly, archive linkables lazily). Until recently I thought GNU distributed thin-lto builds also suffer from this problem. But that's not true, GNU builds solve this archive unarchive problem by [always using lazy semantics unless the target is force loaded](https://www.internalfb.com/code/fbsource/[cab5aa83caca0cf7628f27945fc498439d2d5ebb]/fbcode/buck2/prelude/linking/link_info.bzl?lines=280), even for ObjectsLinkables, and [only producing ObjectsLinkables from cxx_library rules](https://www.internalfb.com/code/fbsource/[1c6e9dd34c19f8d85fad642c437287dd873cb12e]/fbcode/tools/build/buck/gen_modes.py?lines=1281) by setting `requires_objects` on their toolchain. This means that their linker rule logic never consumes `ArchiveLinkables` unless unavoidable (ex: the Rust **compiler** always produces static archives for some reason). I would rather avoid this approach because it will be change in behavior for us, and it is unexpected imho. If the user sets `requires_objects` on their toolchain, and we turn around and pass every target to the linker as if they'd set `requires_archives` that seems a bit surprising to me. In this change, we respect the semantics of the two linkables, but we add the ability to consume `ArchiveLinkables` more intelligently, operating on the object files directly and not creating the archive. For now, we gate the change behind a config so we can measure improvements. Reviewed By: rmaz Differential Revision: D70101572 fbshipit-source-id: c793b1b3b7f809751d406a630d9393f8c4ff672e
1 parent 33a7e7e commit af3f5e8

File tree

2 files changed

+108
-52
lines changed

2 files changed

+108
-52
lines changed

cxx/dist_lto/darwin/dist_lto.bzl

+71-17
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ load("@prelude//utils:argfile.bzl", "at_argfile")
4141
load("@prelude//utils:lazy.bzl", "lazy")
4242
load(":thin_link_record_defs.bzl", "ArchiveMemberOptimizationPlan", "ArchiveOptimizationPlan", "BitcodeMergeState", "ObjectFileOptimizationPlan")
4343

44-
_BitcodeLinkData = record(
44+
_EagerBitcodeLinkData = record(
4545
name = str,
4646
initial_object = Artifact,
4747
bc_file = Artifact,
@@ -50,6 +50,17 @@ _BitcodeLinkData = record(
5050
merged_bc = field([Artifact, None]),
5151
)
5252

53+
_LazyBitcodeLinkData = record(
54+
name = str,
55+
initial_object = Artifact,
56+
bc_file = Artifact,
57+
plan = Artifact,
58+
opt_object = Artifact,
59+
merged_bc = field([Artifact, None]),
60+
archive_start = bool,
61+
archive_end = bool,
62+
)
63+
5364
_ArchiveLinkData = record(
5465
name = str,
5566
manifest = Artifact,
@@ -68,14 +79,15 @@ _DynamicLibraryLinkData = record(
6879
)
6980

7081
_DataType = enum(
71-
"bitcode",
82+
"eager_bitcode",
83+
"lazy_bitcode",
7284
"archive",
7385
"dynamic_library",
7486
)
7587

7688
_IndexLinkData = record(
7789
data_type = _DataType,
78-
link_data = field([_BitcodeLinkData, _ArchiveLinkData, _DynamicLibraryLinkData]),
90+
link_data = field([_LazyBitcodeLinkData, _EagerBitcodeLinkData, _ArchiveLinkData, _DynamicLibraryLinkData]),
7991
)
8092

8193
def cxx_darwin_dist_link(
@@ -198,8 +210,8 @@ def cxx_darwin_dist_link(
198210
plan_outputs.append(merged_bc_output.as_output())
199211

200212
data = _IndexLinkData(
201-
data_type = _DataType("bitcode"),
202-
link_data = _BitcodeLinkData(
213+
data_type = _DataType("eager_bitcode"),
214+
link_data = _EagerBitcodeLinkData(
203215
name = name,
204216
initial_object = obj,
205217
bc_file = bc_output,
@@ -211,6 +223,39 @@ def cxx_darwin_dist_link(
211223
unsorted_index_link_data.append(data)
212224
plan_outputs.extend([bc_output.as_output(), plan_output.as_output()])
213225

226+
# Can't load `read_bool` here because it will cause circular load.
227+
# TODO(T217088553): Remove config read below when measurements have been made:
228+
229+
elif isinstance(linkable, ArchiveLinkable) and linkable.archive.external_objects and read_root_config("user", "enable_virtual_archives_for_distributed_thin_lto", "false") in ("True", "true"):
230+
for virtual_archive_index, obj in enumerate(linkable.archive.external_objects):
231+
name = name_for_obj(link_name, obj)
232+
bc_output = ctx.actions.declare_output(name + ".thinlto.bc")
233+
plan_output = ctx.actions.declare_output(name + ".opt.plan")
234+
opt_output = ctx.actions.declare_output(name + ".opt.o")
235+
merged_bc_output = None
236+
if premerger_enabled:
237+
merged_bc_output = ctx.actions.declare_output(name + ".merged.bc")
238+
plan_outputs.append(merged_bc_output.as_output())
239+
240+
# The first member in a non force loaded virtual archive gets --start-lib prended on the command line
241+
archive_start = (not linkable.link_whole) and virtual_archive_index == 0
242+
archive_end = (not linkable.link_whole) and virtual_archive_index == len(linkable.archive.external_objects) - 1
243+
data = _IndexLinkData(
244+
data_type = _DataType("lazy_bitcode"),
245+
link_data = _LazyBitcodeLinkData(
246+
name = name,
247+
initial_object = obj,
248+
bc_file = bc_output,
249+
plan = plan_output,
250+
opt_object = opt_output,
251+
merged_bc = merged_bc_output,
252+
archive_start = archive_start,
253+
archive_end = archive_end,
254+
),
255+
)
256+
unsorted_index_link_data.append(data)
257+
plan_outputs.extend([bc_output.as_output(), plan_output.as_output()])
258+
214259
elif isinstance(linkable, ArchiveLinkable):
215260
# Our implementation of Distributed ThinLTO operates on individual objects, not archives. Since these
216261
# archives might still contain LTO-able bitcode, we first extract the objects within the archive into
@@ -284,8 +329,10 @@ def cxx_darwin_dist_link(
284329
object_files = []
285330
dynamic_libraries = []
286331
for link_data in input_list:
287-
if link_data.data_type == _DataType("bitcode"):
332+
if link_data.data_type == _DataType("eager_bitcode"):
288333
object_files.append(link_data)
334+
elif link_data.data_type == _DataType("lazy_bitcode"):
335+
regular_archives.append(link_data)
289336
elif link_data.data_type == _DataType("archive"):
290337
if link_data.link_data.link_whole:
291338
force_loaded_archives.append(link_data)
@@ -310,19 +357,26 @@ def cxx_darwin_dist_link(
310357
for idx, artifact in enumerate(sorted_index_link_data):
311358
link_data = artifact.link_data
312359

313-
if artifact.data_type == _DataType("bitcode"):
360+
if artifact.data_type == _DataType("eager_bitcode") or artifact.data_type == _DataType("lazy_bitcode"):
361+
if artifact.data_type == _DataType("lazy_bitcode") and link_data.archive_start:
362+
index_args_out.add("-Wl,--start-lib")
363+
314364
index_args_out.add(link_data.initial_object)
315-
eager_object_file_record = {
365+
366+
if artifact.data_type == _DataType("lazy_bitcode") and link_data.archive_end:
367+
index_args_out.add("-Wl,--end-lib")
368+
369+
object_file_record = {
316370
"input_object_file_path": link_data.initial_object,
317371
"output_index_shard_file_path": outputs[link_data.bc_file].as_output(),
318372
"output_plan_file_path": outputs[link_data.plan].as_output(),
319-
"record_type": "EAGER",
373+
"record_type": "OBJECT_FILE",
320374
"starlark_array_index": idx,
321375
}
322376
if premerger_enabled:
323-
eager_object_file_record["output_premerged_bitcode_file_path"] = outputs[link_data.merged_bc].as_output()
377+
object_file_record["output_premerged_bitcode_file_path"] = outputs[link_data.merged_bc].as_output()
324378

325-
index_meta_records_out.append(eager_object_file_record)
379+
index_meta_records_out.append(object_file_record)
326380

327381
elif artifact.data_type == _DataType("archive"):
328382
manifest = artifacts[link_data.manifest].read_json()
@@ -347,8 +401,8 @@ def cxx_darwin_dist_link(
347401
"input_object_file_path": obj,
348402
"output_index_shards_directory_path": outputs[link_data.indexes_dir].as_output(),
349403
"output_plan_file_path": outputs[link_data.plan].as_output(),
350-
"record_type": "LAZY",
351-
"starlark_array_index": idx,
404+
"record_type": "ARCHIVE_MEMBER",
405+
"starlark_array_index": idx, # Each object shares the same index in the stalark array pointing to the archive link data
352406
}
353407
if premerger_enabled:
354408
lazy_object_file_record["output_premerged_bitcode_directory_path"] = outputs[link_data.merged_bc_dir].as_output()
@@ -481,7 +535,7 @@ def cxx_darwin_dist_link(
481535
# loaded by the indexing phase, or is absorbed by another module,
482536
# there is no point optimizing it.
483537
if not optimization_plan.loaded_by_linker or not optimization_plan.is_bitcode or optimization_plan.merge_state == BitcodeMergeState("ABSORBED").value:
484-
ctx.actions.write(outputs[opt_object], "")
538+
ctx.actions.write(outputs[opt_object].as_output(), "")
485539
return
486540

487541
opt_cmd = cmd_args(lto_opt)
@@ -596,7 +650,7 @@ def cxx_darwin_dist_link(
596650

597651
for artifact in sorted_index_link_data:
598652
link_data = artifact.link_data
599-
if artifact.data_type == _DataType("bitcode"):
653+
if artifact.data_type == _DataType("eager_bitcode") or artifact.data_type == _DataType("lazy_bitcode"):
600654
dynamic_optimize(
601655
name = link_data.name,
602656
initial_object = link_data.initial_object,
@@ -627,7 +681,7 @@ def cxx_darwin_dist_link(
627681
for idx, artifact in enumerate(sorted_index_link_data):
628682
if artifact.data_type == _DataType("dynamic_library"):
629683
append_linkable_args(link_args, artifact.link_data.linkable)
630-
elif artifact.data_type == _DataType("bitcode"):
684+
elif artifact.data_type == _DataType("eager_bitcode") or artifact.data_type == _DataType("lazy_bitcode"):
631685
if idx in non_lto_objects:
632686
opt_objects.append(artifact.link_data.initial_object)
633687
else:
@@ -692,7 +746,7 @@ def cxx_darwin_dist_link(
692746
native_object_files_required_for_dsym_creation = []
693747
for artifact in sorted_index_link_data:
694748
# Without reading dynamic output, we cannot know if a given artifact represents bitcode to be included in LTO, or a native object file already (some inputs to a dthin-lto link are still native object files). We just include both the initial object (that may or may not be bitcode) and the produced bitcode file. dsym-util will only ever need one or the other, we just need to matieralize both.
695-
if artifact.data_type == _DataType("bitcode"):
749+
if artifact.data_type == _DataType("eager_bitcode") or artifact.data_type == _DataType("lazy_bitcode"):
696750
native_object_files_required_for_dsym_creation.append(artifact.link_data.initial_object)
697751
native_object_files_required_for_dsym_creation.append(artifact.link_data.opt_object)
698752
elif artifact.data_type == _DataType("archive"):

0 commit comments

Comments
 (0)