Skip to content

Commit

Permalink
Merge branch 'main' of github.com:Accelergy-Project/timeloop-python
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Gilbert committed Nov 13, 2024
2 parents d115b1a + 1cde00f commit d9a0655
Show file tree
Hide file tree
Showing 9 changed files with 516 additions and 203 deletions.
115 changes: 59 additions & 56 deletions pytimeloop/fastfusion/layerdeduplication.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import defaultdict
from itertools import permutations
from itertools import permutations, product

from pytimeloop.looptree.mapping_utilities import get_intermediate_tensors

Expand All @@ -26,69 +26,72 @@ def is_equivalent(einsum_id1, einsum_id2, workload, analyzer):
if einsum2_output_tensor is None:
einsum2_output_tensor = set()

einsum1_tensors = einsum1_input_tensors | einsum1_output_tensor
einsum2_tensors = einsum2_input_tensors | einsum2_output_tensor

intermediate_tensors = get_intermediate_tensors(workload)

tensor_properties = defaultdict(set)
for tensor in einsum1_input_tensors | einsum2_input_tensors:
tensor_properties[tensor].add('input')
for tensor in einsum1_output_tensor | einsum2_output_tensor:
tensor_properties[tensor].add('input')
for tensor in intermediate_tensors:
if tensor not in tensor_properties:
continue
tensor_properties[tensor].add('intermediate')
tensor_properties = {
tensor: frozenset(properties)
for tensor, properties in tensor_properties.items()
}
property_to_tensors = defaultdict(lambda: (set(), set()))
for tensor, property in tensor_properties:
tensor_sets = property_to_tensors[property]
if tensor in einsum1_tensors:
tensor_sets[0].add(tensor)
else:
tensor_sets[1].add(tensor)

for tensor_sets in property_to_tensors.values():
if len(tensor_sets[0]) != len(tensor_sets[1]):
return None, None



# Check if we can rename einsum1 ranks to create einsum2
for renamed_ranks in permutations(einsum2_ranks):
rank_renaming = {
r1: r2 for r1, r2 in zip(einsum1_ranks, renamed_ranks)
all_tensor_properties = []
all_tensors = [
(einsum1_input_tensors, einsum1_output_tensor),
(einsum2_input_tensors, einsum2_output_tensor)
]
for input_tensors, output_tensors in all_tensors:
tensor_properties = defaultdict(set)
for tensor in input_tensors:
tensor_properties[tensor].add('input')
for tensor in output_tensors:
tensor_properties[tensor].add('output')
for tensor in tensor_properties:
if tensor in intermediate_tensors:
tensor_properties[tensor].add('intermediate')
tensor_properties = {
tensor: frozenset(properties)
for tensor, properties in tensor_properties.items()
}
# for tensor_renaming in get_tensor_renamings(property_to_tensors):
for renamed_input_tensors in permutations(einsum2_input_tensors):
input_tensor_renaming = {
t1: t2 for t1, t2
in zip(einsum1_input_tensors, renamed_input_tensors)
all_tensor_properties.append(tensor_properties)

property_to_tensors = defaultdict(lambda: (set(), set()))
for i, tensor_properties in enumerate(all_tensor_properties):
for tensor, property in tensor_properties.items():
tensor_sets = property_to_tensors[property]
tensor_sets[i].add(tensor)

# Check if we can rename tensors in einsum1 to einsum2
for tensor_renaming in tensor_renamings(property_to_tensors):
# Check if we can rename einsum1 ranks to create einsum2
for renamed_ranks in permutations(einsum2_ranks):
rank_renaming = {
r1: r2 for r1, r2 in zip(einsum1_ranks, renamed_ranks)
}
for renamed_output_tensors in permutations(einsum2_output_tensor):
output_tensor_renaming = {
t1: t2 for t1, t2
in zip(einsum1_output_tensor, renamed_output_tensors)
}
tensor_renaming = input_tensor_renaming | output_tensor_renaming
if not _shape_is_equivalent(rank_renaming, workload):
continue

if not _shape_is_equivalent(rank_renaming, workload):
continue
if not _dependency_is_equivalent(einsum_id1,
einsum_id2,
rank_renaming,
tensor_renaming,
analyzer):
continue

if not _dependency_is_equivalent(einsum_id1,
einsum_id2,
rank_renaming,
tensor_renaming,
analyzer):
continue
return rank_renaming, tensor_renaming
return None, None

return rank_renaming, tensor_renaming

return None, None
def tensor_renamings(property_to_tensors):
for tensors_of_1, tensors_of_2 in property_to_tensors.values():
if len(tensors_of_1) != len(tensors_of_2):
return

all_tensors_of_1 = [
t
for tensors_of_1, _ in property_to_tensors.values()
for t in tensors_of_1
]
permutations_of_tensor_2_by_property = []
for _, tensors_of_2 in property_to_tensors.values():
permutations_of_tensor_2_by_property.append(permutations(tensors_of_2))
for permutation_of_2 in product(*permutations_of_tensor_2_by_property):
permutation_of_2 = tuple(t for tupl in permutation_of_2 for t in tupl)
renaming = dict(zip(all_tensors_of_1, permutation_of_2))
yield renaming


def _shape_is_equivalent(rank_renaming, workload):
Expand Down
154 changes: 126 additions & 28 deletions pytimeloop/fastfusion/mapper/mapper2.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from collections import defaultdict
from copy import deepcopy
import logging.handlers
from pathlib import Path
import logging
logger = logging.getLogger(__name__)

from ruamel.yaml import YAML
from joblib import Parallel, delayed

yaml = YAML(typ="safe")

Expand All @@ -14,7 +17,8 @@
from pytimeloop.fastfusion.mapper.constraints import *
from pytimeloop.fastfusion.layerdeduplication import is_equivalent
from pytimeloop.fastfusion.mapper.logging import make_queue_and_listener
from pytimeloop.fastfusion.mapper.per_einsum_mapper import mapper_one_einsum
from pytimeloop.fastfusion.mapper.per_einsum_mapper import get_top_loop_jobs, mapper_place_fusion_level
from pytimeloop.fastfusion.sim import Tiling, Loop, TensorStorage

from pytimeloop.timeloopfe.v4 import Ert
from pytimeloop.timeloopfe.common.backend_calls import call_accelergy_verbose
Expand Down Expand Up @@ -47,36 +51,130 @@ def mapper(
ert = Ert(ert_dict["ERT"])
energy_dict = ert.to_dict()

data = {}
per_einsum_args = [
dict(
einsum_id=einsum_id,
config=config,
pe_array_constraint=pe_array_constraint,
mac_array_constraint=mac_array_constraint,
explore_glb_uneven=explore_glb_uneven,
explore_pe_uneven=explore_pe_uneven,
spec=spec,
energy_dict=energy_dict,
log_queue=log_queue,
verbose_stream=verbose_stream,
)
for einsum_id in einsum_name_to_id.values()
]

from joblib import Parallel, delayed

logger.debug("Starting workers")
grouped_similar_einsums = convert_rank_to_group_renaming(
detect_similar_einsums(workload, analyzer),
equivalent_groups
)
logger.info(f"Found {len(grouped_similar_einsums)} unique Einsums\n"
+ f"\tConverter: {grouped_similar_einsums}")

args = get_top_loop_jobs(
einsums_to_explore=list(grouped_similar_einsums.keys()),
config=config,
pe_array_constraint=pe_array_constraint,
mac_array_constraint=mac_array_constraint,
explore_glb_uneven=explore_glb_uneven,
explore_pe_uneven=explore_pe_uneven,
spec=spec,
energy_dict=energy_dict,
log_queue=log_queue,
verbose_stream=verbose_stream,
)

print(f'Number of jobs: {len(args)}')
n_workers = 128
logger.debug(f"Starting {n_workers} workers")
log_queue_listener.start()
data = Parallel(n_jobs=32)(
delayed(mapper_one_einsum)(**args) for args in per_einsum_args

result = Parallel(n_jobs=n_workers)(
delayed(mapper_place_fusion_level)(**a) for a in args
)
data = defaultdict(dict)
for einsum_id, mappings in result:
for k, v in mappings.items():
if k in data[einsum_id]:
data[einsum_id][k] += v
else:
data[einsum_id][k] = v

log_queue_listener.stop()

data = {
einsum_id: mapping
for einsum_id, mapping in zip(einsum_name_to_id.values(), data)
}
logger.info(f"Mapper finished for {spec}")

generated_data = {}
logger.info(f"Generating data for non-unique Einsums")
for ref_einsum, others in grouped_similar_einsums.items():
for other_einsum, (rank_renaming, tensor_renaming) in others.items():
logger.info(f"Generating data for {other_einsum}. "
+ f"Rank renaming={rank_renaming}. "
+ f"Tensor renaming={tensor_renaming}")
generated_data[other_einsum] = generate_data(data[ref_einsum],
rank_renaming,
tensor_renaming)

for einsum, mapping in generated_data.items():
data[einsum] = mapping

logger.info(f"Final set of Einsums: {set(data.keys())}")

# data has to come out in sorted Einsum-id order
data = {k: v for k, v in sorted(data.items(), key=lambda item: item[0])}

return data


def generate_data(data, rank_renaming, tensor_renaming):
return {
_convert_tiling(tiling, rank_renaming, tensor_renaming)
:
_convert_stats(stats, rank_renaming, tensor_renaming)
for tiling, stats in data.items()
}


def _convert_tiling(tiling: Tiling, rank_renaming, tensor_renaming):
return Tiling(
loops=tuple(Loop(rank_renaming[l.rank_id], l.bound, l.is_spatial)
for l in tiling.loops),
tensors=frozenset(TensorStorage(tensor_renaming[ts.tensor_id],
ts.backer_id,
ts.above_loop_index,
ts.tile_size)
for ts in tiling.tensors)
)


def _convert_stats(stats, rank_renaming, tensor_renaming):
return deepcopy(stats)


def detect_similar_einsums(workload, analyzer, return_all_as_unique=False):
if return_all_as_unique:
return {ref: {} for ref in workload.einsum_id_to_name()}

ref_to_other_einsums = {}
for einsum in workload.einsum_id_to_name():
found = False
for ref_einsum in ref_to_other_einsums:
rank_renaming, tensor_renaming = is_equivalent(ref_einsum,
einsum,
workload,
analyzer)
if rank_renaming is not None:
ref_to_other_einsums[ref_einsum][einsum] = (rank_renaming,
tensor_renaming)
found = True
break
if not found:
ref_to_other_einsums[einsum] = {}
return ref_to_other_einsums


def convert_rank_to_group_renaming(ref_to_other_einsums, equiv_ranks):
return {
ref: {
other: (_convert_rank_renaming(rank_renaming, equiv_ranks),
tensor_renaming)
for other, (rank_renaming, tensor_renaming) in others.items()
}
for ref, others in ref_to_other_einsums.items()
}


def _convert_rank_renaming(rank_renaming, equiv_ranks):
# The Tiling class uses string ids
return {
str(equiv_ranks.rank_to_group_id[r1])
:
str(equiv_ranks.rank_to_group_id[r2])
for r1, r2 in rank_renaming.items()
}
Loading

0 comments on commit d9a0655

Please sign in to comment.