Skip to content

Commit

Permalink
[CIR][CodeGen] Support trailing_zeros for constant string literals (#617
Browse files Browse the repository at this point in the history
)

The patch resolves [issue
#248](#248). It can be considered
a subsequent patch to [#373](#373),
where the case of empty strings was processed.

The new patch adds processing for non-empty strings that may contain
trailing zeros, such as:
```
char big_string[100000] = "123";
```
That is converted to
```
@big_string = #cir.const_array<"123" : !cir.array<!s8i x 3>, trailing_zeros> : !cir.array<!s8i x 100000>
```
  • Loading branch information
ivanmurashko authored May 24, 2024
1 parent 49609ae commit 8effbcc
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 5 deletions.
19 changes: 15 additions & 4 deletions clang/lib/CIR/CodeGen/CIRGenBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,26 @@ class CIRGenBuilderTy : public CIRBaseBuilderTy {
unsigned size = 0) {
unsigned finalSize = size ? size : str.size();

size_t lastNonZeroPos = str.find_last_not_of('\0');
// If the string is full of null bytes, emit a #cir.zero rather than
// a #cir.const_array.
if (str.count('\0') == str.size()) {
if (lastNonZeroPos == llvm::StringRef::npos) {
auto arrayTy = mlir::cir::ArrayType::get(getContext(), eltTy, finalSize);
return getZeroAttr(arrayTy);
}

auto arrayTy = mlir::cir::ArrayType::get(getContext(), eltTy, finalSize);
return getConstArray(mlir::StringAttr::get(str, arrayTy), arrayTy);
// We will use trailing zeros only if there are more than one zero
// at the end
int trailingZerosNum =
finalSize > lastNonZeroPos + 2 ? finalSize - lastNonZeroPos - 1 : 0;
auto truncatedArrayTy = mlir::cir::ArrayType::get(
getContext(), eltTy, finalSize - trailingZerosNum);
auto fullArrayTy =
mlir::cir::ArrayType::get(getContext(), eltTy, finalSize);
return mlir::cir::ConstArrayAttr::get(
getContext(), fullArrayTy,
mlir::StringAttr::get(str.drop_back(trailingZerosNum),
truncatedArrayTy),
trailingZerosNum);
}

mlir::cir::ConstArrayAttr getConstArray(mlir::Attribute attrs,
Expand Down
4 changes: 3 additions & 1 deletion clang/test/CIR/CodeGen/globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

char string[] = "whatnow";
// CHECK: cir.global external @string = #cir.const_array<"whatnow\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8>
char big_string[100000] = "123";
// CHECK: cir.global external @big_string = #cir.const_array<"123" : !cir.array<!s8i x 3>, trailing_zeros> : !cir.array<!s8i x 100000>
int sint[] = {123, 456, 789};
// CHECK: cir.global external @sint = #cir.const_array<[#cir.int<123> : !s32i, #cir.int<456> : !s32i, #cir.int<789> : !s32i]> : !cir.array<!s32i x 3>
int filler_sint[4] = {1, 2}; // Ensure missing elements are zero-initialized.
Expand Down Expand Up @@ -41,7 +43,7 @@ struct {
char y[3];
char z[3];
} nestedString = {"1", "", "\0"};
// CHECK: cir.global external @nestedString = #cir.const_struct<{#cir.const_array<"1\00\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>, #cir.zero : !cir.array<!s8i x 3>, #cir.zero : !cir.array<!s8i x 3>}>
// CHECK: cir.global external @nestedString = #cir.const_struct<{#cir.const_array<"1" : !cir.array<!s8i x 1>, trailing_zeros> : !cir.array<!s8i x 3>, #cir.zero : !cir.array<!s8i x 3>, #cir.zero : !cir.array<!s8i x 3>}>

struct {
char *name;
Expand Down
24 changes: 24 additions & 0 deletions clang/test/CIR/CodeGen/string-literals.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s

struct {
char x[10];
char y[10];
char z[10];
} literals = {"1", "", "\00"};

// CIR-LABEL: @literals
// CIR: #cir.const_struct<{
// CIR: #cir.const_array<"1" : !cir.array<!s8i x 1>, trailing_zeros> : !cir.array<!s8i x 10>,
// CIR: #cir.zero : !cir.array<!s8i x 10>,
// CIR: #cir.zero : !cir.array<!s8i x 10>
// CIR: }>

// LLVM-LABEL: @literals
// LLVM: global %struct.anon.1 {
// LLVM: [10 x i8] c"1\00\00\00\00\00\00\00\00\00",
// LLVM: [10 x i8] zeroinitializer,
// LLVM: [10 x i8] zeroinitializer
// LLVM: }

0 comments on commit 8effbcc

Please sign in to comment.