From 7277ffbfaec7c860ae0f66eb9742f3d3ab6f0eb5 Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Tue, 26 Aug 2025 17:42:26 +0200 Subject: [PATCH 1/3] [Optimizer] Fix reinterpretation of strings in _get_numpy_value Signed-off-by: Christoph Berganski --- onnxscript/optimizer/_constant_folding.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py index e0b0f59c31..bd6f0c937e 100644 --- a/onnxscript/optimizer/_constant_folding.py +++ b/onnxscript/optimizer/_constant_folding.py @@ -278,9 +278,20 @@ def _get_numpy_value( if size_limit is not None and const_value.size > size_limit: return None try: - # Reinterpret the array with `.view()` because some implementations of - # ir.TensorProtocol (e.g. PyTorch<=2.7) do not use ml_dtypes for bfloat16 etc. - array = const_value.numpy().view(const_value.dtype.numpy()) + # Turn the constant value into a numpy array representation with the + # specifics of this conversion handled by the tensor type (might a + # yield result which needs to be reinterpreted) + array = const_value.numpy() + # Make sure strings are converted to object type first (might be + # some fixed width string representation which .view cannot + # convert, resulting in "TypeError: Cannot change data-type for + # array of references.") + if const_value.dtype == ir.DataType.STRING: + array = array.astype(np.object_) + # Reinterpret the array with `.view()` because some implementations + # of ir.TensorProtocol (e.g. PyTorch<=2.7) do not use ml_dtypes for + # bfloat16 etc. + array = array.view(const_value.dtype.numpy()) except FileNotFoundError: # External data is not available. logger.warning( From d7394091476336b87988f440e05f88c76b0bb511 Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Wed, 27 Aug 2025 15:14:37 +0200 Subject: [PATCH 2/3] [Optimizer] Skip calling .view for string type in _get_numpy_value Signed-off-by: Christoph Berganski --- onnxscript/optimizer/_constant_folding.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py index bd6f0c937e..ff8dd1d325 100644 --- a/onnxscript/optimizer/_constant_folding.py +++ b/onnxscript/optimizer/_constant_folding.py @@ -291,7 +291,8 @@ def _get_numpy_value( # Reinterpret the array with `.view()` because some implementations # of ir.TensorProtocol (e.g. PyTorch<=2.7) do not use ml_dtypes for # bfloat16 etc. - array = array.view(const_value.dtype.numpy()) + else: + array = array.view(const_value.dtype.numpy()) except FileNotFoundError: # External data is not available. logger.warning( From 1ed42ab08f56955369ee051ca6bd0c63121ba656 Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Wed, 27 Aug 2025 20:46:05 +0200 Subject: [PATCH 3/3] [Optimizer] Do not convert strings to object in _get_numpy_value Signed-off-by: Christoph Berganski --- onnxscript/optimizer/_constant_folding.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py index ff8dd1d325..6f11ae7ec9 100644 --- a/onnxscript/optimizer/_constant_folding.py +++ b/onnxscript/optimizer/_constant_folding.py @@ -279,19 +279,16 @@ def _get_numpy_value( return None try: # Turn the constant value into a numpy array representation with the - # specifics of this conversion handled by the tensor type (might a - # yield result which needs to be reinterpreted) + # specifics of this conversion handled by the tensor type array = const_value.numpy() - # Make sure strings are converted to object type first (might be - # some fixed width string representation which .view cannot - # convert, resulting in "TypeError: Cannot change data-type for - # array of references.") - if const_value.dtype == ir.DataType.STRING: - array = array.astype(np.object_) - # Reinterpret the array with `.view()` because some implementations - # of ir.TensorProtocol (e.g. PyTorch<=2.7) do not use ml_dtypes for - # bfloat16 etc. - else: + # Can/should not reinterpret strings via .view, resulting in + # "TypeError: Cannot change data-type for array of references." + # There is also no reason to reinterpret strings, this is only + # relevant for some arithmetic types + if const_value.dtype != ir.DataType.STRING: + # Reinterpret the array with `.view()` because some + # implementations of ir.TensorProtocol (e.g. PyTorch<=2.7) do + # not use ml_dtypes for bfloat16 etc. array = array.view(const_value.dtype.numpy()) except FileNotFoundError: # External data is not available.