|
| 1 | +import openvino as ov |
| 2 | +from openvino.runtime import op, opset1, opset8 |
| 3 | +from openvino_devtools.ov2py import ov2py |
| 4 | + |
| 5 | +import numpy as np |
| 6 | + |
| 7 | + |
| 8 | +def build_model(bs=2, seq_len=7, num_head=32, head_dim=128, max_position_embeddings=2048, dtype=ov.Type.f16): |
| 9 | + input_shape = [bs, seq_len, num_head, head_dim] |
| 10 | + output_shape = [bs, num_head, seq_len, head_dim] |
| 11 | + cache_shape = [1, 1, max_position_embeddings, head_dim] |
| 12 | + input = op.Parameter(dtype, ov.Shape(input_shape)) |
| 13 | + pos_id_end = op.Parameter(ov.Type.i64, ov.Shape()) |
| 14 | + pos_ids = op.Parameter(ov.Type.i64, ov.Shape([1, seq_len])) |
| 15 | + cos_cache = op.Parameter(dtype, ov.Shape(cache_shape)) |
| 16 | + sin_cache = op.Parameter(dtype, ov.Shape(cache_shape)) |
| 17 | + |
| 18 | + def apply(input, cache): |
| 19 | + cache = opset8.slice(cache, [0, 0, 0, 0], [1, 1, seq_len, head_dim], [1, 1, 1, 1]) |
| 20 | + cache = opset1.reshape(cache, [1, seq_len, head_dim], special_zero=False) |
| 21 | + cache = opset1.reshape(cache, [1, 1, seq_len, head_dim], special_zero=False) |
| 22 | + cache = opset1.broadcast(cache, output_shape, [0, 1]) |
| 23 | + return opset1.multiply(input, cache) |
| 24 | + |
| 25 | + transposed_input = opset1.transpose(input, [0, 2, 1, 3]) |
| 26 | + apply_cos = apply(transposed_input, cos_cache) |
| 27 | + |
| 28 | + half_head_dim = head_dim // 2 |
| 29 | + half_head_dim_shape = [bs, num_head, seq_len, half_head_dim] |
| 30 | + transposed_input_first_half = opset8.slice(transposed_input, [0, 0, 0, 0], half_head_dim_shape, [1, 1, 1, 1]) |
| 31 | + transposed_input_second_half = opset8.slice(transposed_input, [0, 0, 0, half_head_dim], output_shape, |
| 32 | + [1, 1, 1, 1]) |
| 33 | + minus1 = op.Constant(dtype, ov.Shape(half_head_dim_shape), [-1.0]) |
| 34 | + transposed_input_second_half = opset1.multiply(transposed_input_second_half, minus1) |
| 35 | + transformed_input = opset1.concat([transposed_input_second_half, transposed_input_first_half], axis=-1) |
| 36 | + |
| 37 | + apply_sin = apply(transformed_input, sin_cache) |
| 38 | + |
| 39 | + result = opset1.add(apply_cos, apply_sin) |
| 40 | + return ov.Model(result, [input, pos_id_end, pos_ids, cos_cache, sin_cache], 'RoPE') |
| 41 | + |
| 42 | + |
| 43 | +if __name__ == "__main__": |
| 44 | + model = build_model() |
| 45 | + print(ov2py(model)) |
| 46 | + |
| 47 | + core = ov.Core() |
| 48 | + compiled_model = core.compile_model(model, "GPU") |
| 49 | + infer_req = compiled_model.create_infer_request() |
| 50 | + |
| 51 | + input = np.full((2, 7, 32, 128), 3.0, dtype=np.float16) |
| 52 | + pos_id_end = np.array(1, dtype=np.int64) |
| 53 | + pos_ids = np.full((1, 7), 1, dtype=np.int64) |
| 54 | + cos_cache = np.full((1, 1, 2048, 128), 3.0, dtype=np.float16) |
| 55 | + sin_cache = np.full((1, 1, 2048, 128), 2.0, dtype=np.float16) |
| 56 | + |
| 57 | + infer_req.set_input_tensor(0, ov.Tensor(input)) |
| 58 | + infer_req.set_input_tensor(1, ov.Tensor(pos_id_end)) |
| 59 | + infer_req.set_input_tensor(2, ov.Tensor(pos_ids)) |
| 60 | + infer_req.set_input_tensor(3, ov.Tensor(cos_cache)) |
| 61 | + infer_req.set_input_tensor(4, ov.Tensor(sin_cache)) |
| 62 | + |
| 63 | + infer_req.infer() |
| 64 | + output = infer_req.get_output_tensor(0) |
| 65 | + |
| 66 | + print("Output:\n", output.data) |
0 commit comments