From acb23f30577c93d12697e1b9115b5e71a035d350 Mon Sep 17 00:00:00 2001 From: Leon Matthes Date: Wed, 17 Jul 2024 16:43:37 +0200 Subject: [PATCH] feat: Skip the AFX_EXT_CLASS macro in C++ parsing We can use TreeSitters includedRanges feature to only parse the specified ranges. This allows us to exclude certain macros like the AFX_EXT_CLASS macro which was causing issues previously. --- docs/API/knut/cppdocument.md | 1 + src/core/codedocument.cpp | 6 ++ src/core/codedocument.h | 3 + src/core/codedocument_p.cpp | 7 ++- src/core/cppdocument.cpp | 62 +++++++++++++++++++ src/core/cppdocument.h | 2 + src/gui/treesitterinspector.cpp | 1 + src/gui/treesittertreemodel.cpp | 6 +- .../treesitterExcludesMacros/AFX_EXT_CLASS.h | 10 +++ tests/tst_cppdocument_treesitter.cpp | 23 +++++++ 10 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 test_data/tst_cppdocument/treesitterExcludesMacros/AFX_EXT_CLASS.h diff --git a/docs/API/knut/cppdocument.md b/docs/API/knut/cppdocument.md index 268652e3..0fb49c69 100644 --- a/docs/API/knut/cppdocument.md +++ b/docs/API/knut/cppdocument.md @@ -298,6 +298,7 @@ The returned QueryMatch instances contain the following captures: - `declaration`: The full declaration of the method - `function`: The function declaration, without the return type - `name`: The name of the function +- `return-type`: The return type of the function without any reference/pointer specifiers (i.e. `&`/`*`) #### array<[QueryMatch](../knut/querymatch.md)> **queryMethodDefinition**(string scope, string methodName) diff --git a/src/core/codedocument.cpp b/src/core/codedocument.cpp index 8fac2a76..871da29b 100644 --- a/src/core/codedocument.cpp +++ b/src/core/codedocument.cpp @@ -848,4 +848,10 @@ AstNode CodeDocument::astNodeAt(int pos) return {}; } +QList CodeDocument::includedRanges() const +{ + // An empty list tells the parser to include the entire document. + return {}; +} + } // namespace Core diff --git a/src/core/codedocument.h b/src/core/codedocument.h index df1c476d..4a0dc4f6 100644 --- a/src/core/codedocument.h +++ b/src/core/codedocument.h @@ -15,6 +15,7 @@ #include "querymatch.h" #include "symbol.h" #include "textdocument.h" +#include "treesitter/parser.h" #include "treesitter/query.h" #include @@ -79,6 +80,8 @@ class CodeDocument : public TextDocument Q_INVOKABLE Core::AstNode astNodeAt(int pos); + virtual QList includedRanges() const; + public slots: void selectSymbol(const QString &name, int options = NoFindFlags); diff --git a/src/core/codedocument_p.cpp b/src/core/codedocument_p.cpp index ce0212c7..bcfd8f55 100644 --- a/src/core/codedocument_p.cpp +++ b/src/core/codedocument_p.cpp @@ -48,7 +48,12 @@ treesitter::Parser &TreeSitterHelper::parser() std::optional &TreeSitterHelper::syntaxTree() { if (!m_tree) { - m_tree = parser().parseString(m_document->text()); + auto &parser = this->parser(); + if (!parser.setIncludedRanges(m_document->includedRanges())) { + spdlog::warn("TreeSitterHelper::syntaxTree: Unable to set the included ranges on the treesitter parser!"); + parser.setIncludedRanges({}); + } + m_tree = parser.parseString(m_document->text()); if (!m_tree) { spdlog::warn("CodeDocument::syntaxTree: Failed to parse document {}!", m_document->fileName()); } diff --git a/src/core/cppdocument.cpp b/src/core/cppdocument.cpp index 656c9847..9500ac77 100644 --- a/src/core/cppdocument.cpp +++ b/src/core/cppdocument.cpp @@ -692,6 +692,7 @@ MessageMap CppDocument::mfcExtractMessageMap(const QString &className /* = ""*/) * - `declaration`: The full declaration of the method * - `function`: The function declaration, without the return type * - `name`: The name of the function + * - `return-type`: The return type of the function without any reference/pointer specifiers (i.e. `&`/`*`) */ Core::QueryMatchList CppDocument::queryMethodDeclaration(const QString &className, const QString &functionName) { @@ -1545,4 +1546,65 @@ QStringList CppDocument::primitiveTypes() const return Utils::cppPrimitiveTypes(); } +QList CppDocument::includedRanges() const +{ + QList macros {"AFX_EXT_CLASS"}; + QList ranges; + treesitter::Point lastPoint {0, 0}; + uint32_t lastByte = 0; + + auto document = textEdit()->document(); + + QRegularExpression regex(macros.join("|")); + for (auto block = document->firstBlock(); block.isValid(); block = block.next()) { + QRegularExpressionMatch match; + auto searchFrom = 0; + auto index = block.text().indexOf(regex, searchFrom, &match); + + // Run this in a loop to support multiple macros on the same line. + while (index != -1) { + // We need to construct a range from the end of the last match to the start of the current match. + // + // Note that the ranges have an inclusive start and an exclusive end.. + // + // Also Note that the column seems to be in bytes, not characters. + // This is why we multiply by sizeof(QChar) to get the correct column. + // At least that's what the TreeSitterInspector shows us. + auto endPoint = treesitter::Point {.row = static_cast(block.blockNumber()), + .column = static_cast(index * sizeof(QChar))}; + ranges.push_back({.start_point = lastPoint, + .end_point = endPoint, + .start_byte = lastByte, + // No need to add - 1 here, the ranges are exclusive at the end. + .end_byte = static_cast((block.position() + index) * sizeof(QChar))}); + + auto matchLength = match.capturedLength(); + lastByte = static_cast((block.position() + index + matchLength) * sizeof(QChar)); + lastPoint = {.row = static_cast(block.blockNumber()), + .column = static_cast((index + matchLength) * sizeof(QChar))}; + if (lastPoint.column == static_cast(block.length())) { + ++lastPoint.row; + lastPoint.column = 0; + } + + searchFrom = index + matchLength; + index = block.text().indexOf(regex, searchFrom, &match); + } + } + + if (!ranges.isEmpty()) { + // Add the last range, up to the end of the document, but only if we have another range. + // Leaving the ranges empty will parse the entire document, so that's easiest. + auto endPoint = + treesitter::Point {.row = static_cast(document->blockCount() - 1), + .column = static_cast(document->lastBlock().length() * sizeof(QChar))}; + ranges.push_back({.start_point = lastPoint, + .end_point = endPoint, + .start_byte = lastByte, + .end_byte = static_cast(document->characterCount() * sizeof(QChar))}); + } + + return ranges; +} + } // namespace Core diff --git a/src/core/cppdocument.h b/src/core/cppdocument.h index 00e021db..cab1f701 100644 --- a/src/core/cppdocument.h +++ b/src/core/cppdocument.h @@ -60,6 +60,8 @@ class CppDocument : public CodeDocument bool changeBaseClass(CppDocument *header, CppDocument *source, const QString &className, const QString &newClassBaseName); + QList includedRanges() const override; + public slots: Core::CppDocument *openHeaderSource(); diff --git a/src/gui/treesitterinspector.cpp b/src/gui/treesitterinspector.cpp index 944cc479..49310a49 100644 --- a/src/gui/treesitterinspector.cpp +++ b/src/gui/treesitterinspector.cpp @@ -182,6 +182,7 @@ void TreeSitterInspector::changeText() Core::LoggerDisabler disableLogging; text = m_document->text(); } + m_parser.setIncludedRanges(m_document->includedRanges()); auto tree = m_parser.parseString(text); if (tree.has_value()) { m_treemodel.setTree(std::move(tree.value()), makePredicates(), ui->enableUnnamed->isChecked()); diff --git a/src/gui/treesittertreemodel.cpp b/src/gui/treesittertreemodel.cpp index 9341a8de..d38acc82 100644 --- a/src/gui/treesittertreemodel.cpp +++ b/src/gui/treesittertreemodel.cpp @@ -74,11 +74,13 @@ QVariant TreeSitterTreeModel::TreeNode::data(int column) const return QString("%1: %2").arg(fieldName, m_node.type()); } case 1: - return QString("[%1:%2] - [%3:%4]") + return QString("[%1:%2](%3) - [%4:%5](%6)") .arg(m_node.startPoint().row) .arg(m_node.startPoint().column) + .arg(m_node.startPosition()) .arg(m_node.endPoint().row) - .arg(m_node.endPoint().column); + .arg(m_node.endPoint().column) + .arg(m_node.endPosition()); default: break; } diff --git a/test_data/tst_cppdocument/treesitterExcludesMacros/AFX_EXT_CLASS.h b/test_data/tst_cppdocument/treesitterExcludesMacros/AFX_EXT_CLASS.h new file mode 100644 index 00000000..4c6df75c --- /dev/null +++ b/test_data/tst_cppdocument/treesitterExcludesMacros/AFX_EXT_CLASS.h @@ -0,0 +1,10 @@ +#pragma once + +class AFX_EXT_CLASS TestClass : public AFX_EXT_CLASSBase{ +publicAFX_EXT_CLASS: + void testMethod(); + +private: + int AFX_EXT_CLASS m_count; +}; +AFX_EXT_CLASS diff --git a/tests/tst_cppdocument_treesitter.cpp b/tests/tst_cppdocument_treesitter.cpp index b250a634..5ae65991 100644 --- a/tests/tst_cppdocument_treesitter.cpp +++ b/tests/tst_cppdocument_treesitter.cpp @@ -358,6 +358,29 @@ private slots: QVERIFY(headerFile.compare()); } } + + void excludeMacros() + { + Test::testCppDocument("tst_cppdocument/treesitterExcludesMacros", "AFX_EXT_CLASS.h", [](auto *document) { + auto match = document->queryClassDefinition("TestClass"); + QVERIFY(!match.isEmpty()); + QCOMPARE(match.get("name").text(), "TestClass"); + QCOMPARE(match.get("base").text(), "Base"); + + match = document->queryMember("TestClass", "m_count"); + QVERIFY(!match.isEmpty()); + QCOMPARE(match.get("name").text(), "m_count"); + QCOMPARE(match.get("type").text(), "int"); + QCOMPARE(match.get("member").text(), "int AFX_EXT_CLASS m_count;"); + + auto matches = document->queryMethodDeclaration("TestClass", "testMethod"); + QCOMPARE(matches.length(), 1); + match = matches.front(); + QVERIFY(!match.isEmpty()); + QCOMPARE(match.get("name").text(), "testMethod"); + QCOMPARE(match.get("return-type").text(), "void"); + }); + } }; QTEST_MAIN(TestCppDocumentTreeSitter)