Skip to content

Commit

Permalink
[Testing] e2e numerical test with user defined inputs and expected va…
Browse files Browse the repository at this point in the history
…lue (#830)

llvm-cpu `truncf` rounds up when it should round down, this PR allows us
to circumvent this issue by testing with hand-crafted inputs and
expected output. See
#829 (comment)
  • Loading branch information
newling authored Oct 8, 2024
1 parent be42d19 commit 653f441
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 126 deletions.
181 changes: 100 additions & 81 deletions build_tools/ci/cpu_comparison/input_generator.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,3 @@
# This script is expected to be run from the command-line with 3 arguments:
#
# 1) the name of a file to parse.
# 2) the directory where binary files will be written.
# 3) a random seed.
#
# Example:
# ```
# python input_generator.py <input_file> <output_dir> <seed>
# ```
#
# The file <input_file> contains an mlir function, and header information
# about the inputs. See existing tests for examples.
#
# The header information specifies the number, shape, and type of inputs.
# Example:
#
# ```
# # input 3x40xf32
# # input 2x2xi32
# ```
# This script finds all lines of the form above and generates binary files with
# random data for them.
#
# This script also create a file containing a single line of the form
# `--input="3x40xf32=@<binary_file>" --input="2x2xi32=@<binary_file>"`
#
# which will be used as input to iree-run-module in the main script.


import numpy as np
import struct
import sys
Expand All @@ -36,7 +6,7 @@
from numpy.random import Generator, MT19937, SeedSequence


def convert_f32_to_bf16(float32_array):
def f32_to_bf16(float32_array):
"""
IEEE float32 to bfloat16
Expand All @@ -63,21 +33,21 @@ def convert_f32_to_bf16(float32_array):
return v0.astype(np.uint16)


def convert_bf16_to_f32(bfloat16_array):
def bf16_to_f32(bfloat16_array):
"""
IEEE bfloat16 to float32. See docstring of convert_f32_to_bf16 for a
IEEE bfloat16 to float32. See docstring of f32_to_bf16 for a
bit of info on the mantissa/exponent manipulation.
"""
v0 = bfloat16_array.astype(np.uint32) << 16
return np.frombuffer(v0.tobytes(), dtype=np.float32).reshape(bfloat16_array.shape)


def generate_bfloat16_data(num_values, lower_bound, upper_bound, rng):
def generate_bfloat16_data(nb_values, lower_bound, upper_bound, rng):

float_data = rng.integers(lower_bound, upper_bound, num_values).astype(np.float32)
float_data = rng.integers(lower_bound, upper_bound, nb_values).astype(np.float32)

# Convert float32 data to bfloat16
bf16_data = convert_f32_to_bf16(float_data)
bf16_data = f32_to_bf16(float_data)

# Pack bfloat16 data into binary format
binary_data = struct.pack(f"{len(bf16_data)}H", *bf16_data)
Expand Down Expand Up @@ -153,23 +123,28 @@ def load_input(input_string):
return matrix


def write_input(bin_filename, num_elements, element_type, input_number, input_seed):
def generate_and_write_input(bin_fn, nb_elements, element_type, input_number, seed):
"""
Generate `nb_elements` random values based on the random seed `seed`
and write them to the binary file `bin_fn`. The elements will
be of type `element_type`.
"""

# Random integer values in range [lower_bound, upper_bound)
# will be generated for the input data.
lower_bound = 0
upper_bound = 10

rng = get_generator(input_seed)
rng = get_generator(seed)

data = None
if element_type == "bfloat16" or element_type == "bf16":
data = generate_bfloat16_data(num_elements, lower_bound, upper_bound, rng)
data = generate_bfloat16_data(nb_elements, lower_bound, upper_bound, rng)
else:
dtype = get_numpy_type(element_type)
tensor = rng.integers(lower_bound, upper_bound, num_elements).astype(dtype)
tensor = rng.integers(lower_bound, upper_bound, nb_elements).astype(dtype)
data = tensor.tobytes()

with open(bin_filename, "wb") as file:
with open(bin_fn, "wb") as file:
file.write(data)


Expand All @@ -178,27 +153,25 @@ def get_output_type(filename):
Reads the contents of 'filename' which must contain an MLIR function with
a single returned value, a tensor.
If there's a line of the form '// output 4xf32' then
If there's a line of the form '// output 4xf32' in the read file then
just return the string '4xf32'.
Otherwise find the return op at the end of the function, and get the
type from the tensor type. i.e. get '3xf32' from 'tensor<3xf32>'
Otherwise find the return op that terminates the function, and get the
type from the tensor type. i.e. find a line of the form
'return %foo : tensor<3x4xi32>' and extract and return 3x4xi32.
"""

# First attempt: find line of the form '// output 4xf32'
with open(filename, "r") as file:
# First attempt: find line of the form '// output 4xf32'
# This is fail safe for developers: Just add this line to IR being
# tested.
for line in file:
line = line.strip()
tokens = line.split()
if len(tokens) > 2 and tokens[0] == "//":
if tokens[1] == "output":
return tokens[2].strip()

# Second attempt (for legacy test files)
# Find a line of the form
# 'return %foo : tensor<1x2x3x4xsi32>'
# Second attempt. Find the return operation, and extract the tensor type.
# This won't work if types have aliases.
with open(filename, "r") as file:
for line in file:
if "return " in line:
Expand All @@ -210,17 +183,17 @@ def get_output_type(filename):
return line

raise ValueError(
"Could not find output from the MLIR file. Consider adding a line of the form // output to the file."
"Could not find output type from the MLIR file. Consider adding a line of the form '// output 4xf32' to the file."
)


def np_from_binfile(bin_file, type_str):
def np_from_binfile(bin_fn, type_str):
"""
Load a numpy array from the binary file bin_file.
Load a numpy array from the binary file bin_fn.
Not much interesting here, but the case where element_type_str is 'bf16' is
possibly not obvious: there is no native numpy element type for brainfloat,
so we load it as uint16 and then convert it to float32 (by just packing
The case where element_type_str is 'bf16' is possibly not obvious:
there is no native numpy element type for brainfloat,
so it is loaded as uint16 and then converted to float32 (by packing
extra mantissa 0 bits).
"""

Expand All @@ -236,20 +209,53 @@ def np_from_binfile(bin_file, type_str):
shape = [int(x) for x in type_str.strip().split("x")[0:-1]]

# Load data with the numpy type specified.
array = np.fromfile(bin_file, dtype=np_type)
array = np.fromfile(bin_fn, dtype=np_type)
array = array.reshape(shape)

# If the numpy type was just a proxy, do some extra processing.
if element_type_str == "bf16":
array = convert_bf16_to_f32(array)
array = bf16_to_f32(array)

return array


def generate_inputs(filename, write_dir, seed):
def write_array(bin_fn, nb_elements, element_type, np_array):
"""
Write the numpy array `np_array` to the binary file `bin_fn`. The
number of elements in `np_array` must be `nb_elements` (this is verified).
The elements in `np_array` will be cast to the data type `element_type`,
and so can be of any type.
"""
# Assert that the number of elements is correct:
if nb_elements != np_array.size:
raise ValueError(
f"Expected {nb_elements} elements, but got {np_array.size} elements."
)

if element_type == "bf16":
array_f32 = np_array.astype(np.float32)
data = f32_to_bf16(array_f32).tobytes()

else:
target_type = get_numpy_type(element_type)
data = np_array.astype(target_type).tobytes()

with open(bin_fn, "wb") as file:
file.write(data)


def generate_inputs(filename, write_dir, seed, preset_inputs={}):
"""
Parse the input file 'filename' and generate binary files for the inputs of
the mlir function.
Parse the MLIR file `filename` and generate and write binary files for the
inputs of the MLIR function. The inputs either contain values generated at
random based on the seed `seed`, or the values are taken from `preset_inputs`.
`preset_inputs` is a map from input index (the first index is '1') to a
numpy array.
Example: suppose the MLIR file contains a func.func with 2 arguments,
and `preset_inputs` is {'2': np.array([1, 2, 3], dtype=np.int32)}. Then the
first argument to the MLIR function will have random values generated for it,
and the second will have values [1, 2, 3].
"""

name = os.path.splitext(os.path.basename(filename))[0]
Expand All @@ -268,29 +274,42 @@ def generate_inputs(filename, write_dir, seed):
sub_tokens = tokens[2].split("x")
element_type = sub_tokens[-1]

num_elements = 1
nb_elements = 1
for i in range(len(sub_tokens) - 1):
num_elements *= int(sub_tokens[i])
bin_filename = os.path.join(
nb_elements *= int(sub_tokens[i])
bin_fn = os.path.join(
write_dir, name + "_input" + str(input_number) + ".bin"
)
if re.search(r"\s", str(bin_filename)):
if re.search(r"\s", str(bin_fn)):
raise RuntimeError(
f"input {tokens[2]}={bin_filename} has a space in the filename, which is not supported"
f"input {tokens[2]}={bin_fn} has a space in the filename, which is not supported"
)

input_args.append(f"--input={tokens[2]}=@{bin_filename}")
input_args.append(f"--input={tokens[2]}=@{bin_fn}")
# Each input has a distinct seed, based on its input number.
# This is to ensure that operands are not populated with the
# same values.
input_seed = seed + input_number
write_input(
bin_filename,
num_elements,
element_type,
input_number,
input_seed,
)

# Check if input_number is a key in the dictionary. If it is
# write the value in the dictionary. otherwise create a
# random array.
if input_number in preset_inputs:
write_array(
bin_fn,
nb_elements,
element_type,
preset_inputs[input_number],
)
else:
generate_and_write_input(
bin_fn,
nb_elements,
element_type,
input_number,
input_seed,
)

input_number += 1

if (len(tokens) == 2) and tokens[0] == "//input":
Expand All @@ -301,28 +320,28 @@ def generate_inputs(filename, write_dir, seed):

# Try and check that the number of inputs is correct, raise error if
# suspected to be incorrect. This isn't perfect, but hopefully it will
# catch some errors than it detects false positives.
# catch more errors than it detects false positives.

# Find all func.funcs and count their operands:
func_num_inputs = []
func_nb_inputs = []
with open(filename, "r") as file:
all_lines = file.read()
func_func_index = all_lines.find("func.func")
while func_func_index != -1:
open_paren_index = all_lines.find("(", func_func_index)
close_paren_index = all_lines.find(")", open_paren_index)
num_colons = all_lines.count(":", open_paren_index, close_paren_index)
func_num_inputs.append(num_colons)
nb_colons = all_lines.count(":", open_paren_index, close_paren_index)
func_nb_inputs.append(nb_colons)
func_func_index = all_lines.find("func.func", close_paren_index)

# If the number of inputs initially detected doesn't correspond to the
# number of inputs in any of the mlir functions, raise an error.
if len(input_args) not in func_num_inputs:
if len(input_args) not in func_nb_inputs:
raise ValueError(
f"Number of inputs generated does not match the number of inputs in "
f"any of the mlir functions. The number of inputs generated is "
f"{len(input_args)}, the number of inputs in the mlir functions are "
f"{func_num_inputs}"
f"{func_nb_inputs}"
)

return input_args
61 changes: 21 additions & 40 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True, en
)
if raise_on_error and handle.returncode != 0:
raise RuntimeError(
f"Error executing script, error code was {handle.returncode}"
f"Error executing script, error code was {handle.returncode}",
)
return stdout_decode, stderr_decode

Expand Down Expand Up @@ -574,45 +574,6 @@ def aie_vs_llvm_cpu(
)


def aie_vs_np_matmul(
config,
test_file,
use_ukernel=False,
tile_pipeline="pad-pack",
lower_to_aie_pipeline="air",
function_name=None,
seed=1,
rtol=1e-6,
atol=1e-6,
n_repeats=1,
):
""" """

if n_repeats == 0:
return

name = name_from_mlir_filename(test_file)
input_args = generate_inputs(test_file, config.output_dir, seed)
output_type = get_output_type(test_file)

numpy_output = matmul_from_input_strings(input_args)
aie_vs_baseline(
config,
test_file,
input_args,
numpy_output,
use_ukernel,
tile_pipeline,
lower_to_aie_pipeline,
function_name,
seed,
rtol,
atol,
n_repeats,
output_type,
)


class TestSet:
def __init__(self, name):
self.name = name
Expand Down Expand Up @@ -823,6 +784,26 @@ def run(self, config):
lower_to_aie_pipeline="objectFifo",
)

# Test using custom input and output:
ones = np.ones(64 * 64, np.float32).reshape([64, 64])
name = name_from_mlir_filename(test_name)
input_args = generate_inputs(test_name, output_dir, 1, {1: ones, 2: ones})
aie_vs_baseline(
config,
test_name,
input_args,
ones * 64, # exected output
use_ukernel=False,
tile_pipeline="pack-peel",
lower_to_aie_pipeline="objectFifo",
function_name=None,
seed=1,
rtol=0,
atol=0,
n_repeats=1,
output_type=get_output_type(test_name),
)


def get_test_partition():
return [ConvolutionTemplateSet(), ConvolutionSet(), MatmulSet(), SmokeSet()]
Expand Down
Loading

0 comments on commit 653f441

Please sign in to comment.