Skip to content

Commit

Permalink
Indexed name codegen improvements (#550)
Browse files Browse the repository at this point in the history
Improved index code generation within the LLVM pipeline.
The following issues were addressed:

Array indices are i64 per LLVM's addressing convention.
This means that if the value is not a constant, an additional
sext instruction must be created.

Bounds check is removed since it requires a certain analysis
on the index value. This can be addressed in a separate PR.

`IndexedName` code generation is separated into 2 functions
The first, `get_array_length()` is responsible for array initialisation,
the second, `get_array_index()`, for indexing. In latter case, we
support the following cases:
```
...
// Indexing with an integer constant
k[0] = ...

// Indexing with an integer expression
k[10 - 10] 

// Indexing with a `Name` AST node that is an integer
// (in our case a FOR loop induction variable or a variable
// with `CodegenVarType` == `Integer`
k[id] = ...      
k[ena_id] = ...
```
Note that the case:
```
// id := loop integer induction variable
k[id + 1] = ... 
```
is not supported for 2 reasons:

On the AST level, as per #545 the expression would
contain a Name and not VarName node that fails the
code generation.

The case only arises in the kernel functions like state_update,
where indexing is "artificially" created with indexing by a Name
only.

fixes #541
  • Loading branch information
georgemitenkov authored and iomaganaris committed Sep 15, 2022
1 parent e1e8eab commit fd2053e
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 64 deletions.
71 changes: 39 additions & 32 deletions src/codegen/llvm/codegen_llvm_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,28 +39,17 @@ static bool is_supported_statement(const ast::Statement& statement) {
statement.is_if_statement() || statement.is_while_statement();
}

bool CodegenLLVMVisitor::check_array_bounds(const ast::IndexedName& node, unsigned index) {
llvm::Type* array_type = lookup(node.get_node_name())->getType()->getPointerElementType();
unsigned length = array_type->getArrayNumElements();
return 0 <= index && index < length;
}

llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, unsigned index) {
llvm::Type* index_type = llvm::Type::getInt32Ty(*context);
llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, llvm::Value* index) {
llvm::Type* index_type = llvm::Type::getInt64Ty(*context);
std::vector<llvm::Value*> indices;
indices.push_back(llvm::ConstantInt::get(index_type, 0));
indices.push_back(llvm::ConstantInt::get(index_type, index));
indices.push_back(index);

return builder.CreateInBoundsGEP(lookup(name), indices);
}

llvm::Value* CodegenLLVMVisitor::codegen_indexed_name(const ast::IndexedName& node) {
unsigned index = get_array_index_or_length(node);

// Check if index is within array bounds.
if (!check_array_bounds(node, index))
throw std::runtime_error("Error: Index is out of bounds");

llvm::Value* index = get_array_index(node);
return create_gep(node.get_node_name(), index);
}

Expand Down Expand Up @@ -96,20 +85,11 @@ llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstance
if (!member_var_name->get_name()->is_indexed_name())
throw std::runtime_error("Error: " + member_name + " is not an IndexedName!");

// Proceed to creating a GEP instruction to get the pointer to the member's element. While LLVM
// Helper set the indices to be Name nodes, a sanity check is added here. Note that this step
// can be avoided if using `get_array_index_or_length()`. However, it does not support indexing
// with Name/Expression at the moment. \todo: Reuse `get_array_index_or_length()` here.
// Proceed to creating a GEP instruction to get the pointer to the member's element.
auto member_indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(
member_var_name->get_name());
if (!member_indexed_name->get_length()->is_name())
throw std::runtime_error("Error: " + member_name + " has a non-Name index!");
llvm::Value* i64_index = get_array_index(*member_indexed_name);

// Load the index variable that will be used to access the member's element. Since we index a
// pointer variable, we need to extend the 32-bit integer index variable to 64-bit.
llvm::Value* i32_index = builder.CreateLoad(
lookup(member_indexed_name->get_length()->get_node_name()));
llvm::Value* i64_index = builder.CreateSExt(i32_index, llvm::Type::getInt64Ty(*context));

// Create a indices vector for GEP to return the pointer to the element at the specified index.
std::vector<llvm::Value*> member_indices;
Expand All @@ -135,17 +115,44 @@ llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstance
return builder.CreateInBoundsGEP(instance_member, member_indices);
}

unsigned CodegenLLVMVisitor::get_array_index_or_length(const ast::IndexedName& indexed_name) {
// \todo: Support indices with expressions and names: k[i + j] = ...
auto integer = std::dynamic_pointer_cast<ast::Integer>(indexed_name.get_length());
llvm::Value* CodegenLLVMVisitor::get_array_index(const ast::IndexedName& node) {
// Process the index expression. It can either be a Name node:
// k[id] // id is an integer
// or an integer expression.
llvm::Value* index_value;
if (node.get_length()->is_name()) {
llvm::Value* ptr = lookup(node.get_length()->get_node_name());
index_value = builder.CreateLoad(ptr);
} else {
node.get_length()->accept(*this);
index_value = values.back();
values.pop_back();
}

// Check if index is a double. While it is possible to use casting from double to integer
// values, we choose not to support these cases.
if (!index_value->getType()->isIntOrIntVectorTy())
throw std::runtime_error("Error: only integer indexing is supported!");

// Conventionally, in LLVM array indices are 64 bit.
auto index_type = llvm::cast<llvm::IntegerType>(index_value->getType());
llvm::Type* i64_type = llvm::Type::getInt64Ty(*context);
if (index_type->getBitWidth() == i64_type->getIntegerBitWidth())
return index_value;

return builder.CreateSExtOrTrunc(index_value, i64_type);
}

int CodegenLLVMVisitor::get_array_length(const ast::IndexedName& node) {
auto integer = std::dynamic_pointer_cast<ast::Integer>(node.get_length());
if (!integer)
throw std::runtime_error("Error: only integer indices/length are supported!");
throw std::runtime_error("Error: only integer length is supported!");

// Check if integer value is taken from a macro.
if (!integer->get_macro())
return integer->get_value();
const auto& macro = sym_tab->lookup(integer->get_macro()->get_node_name());
return static_cast<unsigned>(*macro->get_value());
return static_cast<int>(*macro->get_value());
}

llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType& node) {
Expand Down Expand Up @@ -691,7 +698,7 @@ void CodegenLLVMVisitor::visit_codegen_var_list_statement(
llvm::Type* var_type;
if (identifier->is_indexed_name()) {
auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
unsigned length = get_array_index_or_length(*indexed_name);
int length = get_array_length(*indexed_name);
var_type = llvm::ArrayType::get(scalar_var_type, length);
} else if (identifier->is_name()) {
// This case corresponds to a scalar local variable. Its type is double by default.
Expand Down
21 changes: 11 additions & 10 deletions src/codegen/llvm/codegen_llvm_visitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,6 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
, builder(*context)
, fpm(module.get()) {}

/**
* Checks if array index specified by the given IndexedName is within bounds
* \param node IndexedName representing array
* \return \c true if the index is within bounds
*/
bool check_array_bounds(const ast::IndexedName& node, unsigned index);

/**
* Generates LLVM code for the given IndexedName
Expand All @@ -146,14 +140,21 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
* \param index element index
* \return GEP instruction value
*/
llvm::Value* create_gep(const std::string& name, unsigned index);
llvm::Value* create_gep(const std::string& name, llvm::Value* index);

/**
* Returns array index from given IndexedName
* \param node IndexedName representing array
* \return array index
*/
llvm::Value* get_array_index(const ast::IndexedName& node);

/**
* Returns array index or length from given IndexedName
* Returns array length from given IndexedName
* \param node IndexedName representing array
* \return array index or length
* \return array length
*/
unsigned get_array_index_or_length(const ast::IndexedName& node);
int get_array_length(const ast::IndexedName& node);

/**
* Returns LLVM type for the given CodegenVarType node
Expand Down
37 changes: 15 additions & 22 deletions test/unit/codegen/codegen_llvm_ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ SCENARIO("Indexed name", "[visitor][llvm]") {
std::string nmodl_text = R"(
PROCEDURE foo() {
LOCAL x[2]
x[10 - 10] = 1
x[1] = 3
}
)";
Expand All @@ -565,14 +566,19 @@ SCENARIO("Indexed name", "[visitor][llvm]") {
std::string module_string = run_llvm_visitor(nmodl_text);
std::smatch m;

// Check GEP is created correctly to pint at array element.
std::regex GEP(
R"(%1 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i32 0, i32 1)");
REQUIRE(std::regex_search(module_string, m, GEP));

// Check the value is stored to the pointer.
std::regex store(R"(store double 3.000000e\+00, double\* %1)");
REQUIRE(std::regex_search(module_string, m, store));
// Check GEPs are created correctly to get the addresses of array elements.
std::regex GEP1(
R"(%1 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i64 0, i64 0)");
std::regex GEP2(
R"(%2 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i64 0, i64 1)");
REQUIRE(std::regex_search(module_string, m, GEP1));
REQUIRE(std::regex_search(module_string, m, GEP2));

// Check the value is stored to the correct addresses.
std::regex store1(R"(store double 1.000000e\+00, double\* %1)");
std::regex store2(R"(store double 3.000000e\+00, double\* %2)");
REQUIRE(std::regex_search(module_string, m, store1));
REQUIRE(std::regex_search(module_string, m, store2));
}
}

Expand All @@ -591,7 +597,7 @@ SCENARIO("Indexed name", "[visitor][llvm]") {

// Check GEP is created correctly to pint at array element.
std::regex GEP(
R"(%2 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i32 0, i32 1)");
R"(%2 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i64 0, i64 1)");
REQUIRE(std::regex_search(module_string, m, GEP));

// Check the value is loaded from the pointer.
Expand All @@ -603,19 +609,6 @@ SCENARIO("Indexed name", "[visitor][llvm]") {
REQUIRE(std::regex_search(module_string, m, store));
}
}

GIVEN("Array with out of bounds access") {
std::string nmodl_text = R"(
PROCEDURE foo() {
LOCAL x[2]
x[5] = 3
}
)";

THEN("error is thrown") {
REQUIRE_THROWS_AS(run_llvm_visitor(nmodl_text), std::runtime_error);
}
}
}

//=============================================================================
Expand Down

0 comments on commit fd2053e

Please sign in to comment.