Skip to content

Commit

Permalink
拼写检查支持检查字符串中的内容
Browse files Browse the repository at this point in the history
  • Loading branch information
CppCXY committed May 26, 2022
1 parent 219767f commit 1bdfe5a
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 10 deletions.
62 changes: 61 additions & 1 deletion CodeService/src/Spell/CodeSpellChecker.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "CodeService/Spell/CodeSpellChecker.h"
#include "LuaParser/LuaTokenTypeDetail.h"
#include "Util/format.h"
#include "CodeService/Spell/TextParser.h"

CodeSpellChecker::CodeSpellChecker()
: _symSpell(std::make_shared<SymSpell>(SymSpell::Strategy::LazyLoaded))
Expand Down Expand Up @@ -29,6 +30,10 @@ void CodeSpellChecker::Analysis(DiagnosisContext& ctx, const CustomDictionary& c
{
IdentifyAnalysis(ctx, token, customDict);
}
else if (token.TokenType == TK_STRING)
{
TextAnalysis(ctx, token, customDict);
}
}
}

Expand Down Expand Up @@ -182,7 +187,62 @@ void CodeSpellChecker::IdentifyAnalysis(DiagnosisContext& ctx, LuaToken& token,
token.Range.StartOffset + word.Range.Start + word.Range.Count - 1
);
std::string originText(token.Text.substr(word.Range.Start, word.Range.Count));
ctx.PushDiagnosis(Util::format("Typo in identifier '{}'", originText), range, DiagnosisType::Spell, originText);
ctx.PushDiagnosis(Util::format("Typo in identifier '{}'", originText), range, DiagnosisType::Spell,
originText);
}
}
}

void CodeSpellChecker::TextAnalysis(DiagnosisContext& ctx, LuaToken& token, const CustomDictionary& customDict)
{
std::shared_ptr<spell::TextParser> parser = std::make_shared<spell::TextParser>(token.Text);
parser->Parse();
auto& identifiers = parser->GetIdentifiers();
if (identifiers.empty())
{
return;
}

for (auto& identifier : identifiers)
{
auto& text = identifier.Item;

if (customDict.count(text) != 0)
{
continue;
}
std::shared_ptr<spell::IdentifyParser> identifierParser = nullptr;

auto it = _caches.find(text);
if (it != _caches.end())
{
identifierParser = it->second;
}
else
{
identifierParser = std::make_shared<spell::IdentifyParser>(text);
identifierParser->Parse();
_caches.insert({text, identifierParser});
}

auto& words = identifierParser->GetWords();
if (words.empty())
{
continue;
}

for (auto& word : words)
{
if (!word.Item.empty() && !_symSpell->IsCorrectWord(word.Item) && customDict.count(word.Item) == 0)
{
auto range = TextRange(token.Range.StartOffset + identifier.Range.Start + word.Range.Start,
token.Range.StartOffset + identifier.Range.Start + word.Range.Start + word.Range.
Count - 1
);
std::string originText(token.Text.substr(identifier.Range.Start + word.Range.Start, word.Range.Count));
ctx.PushDiagnosis(Util::format("Typo in string '{}'", originText), range, DiagnosisType::Spell,
originText);
}
}
}
}
6 changes: 6 additions & 0 deletions CodeService/src/Spell/IdentifyParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,12 @@ int IdentifyParser::GetCurrentChar()

void IdentifyParser::PushWords(WordRange range)
{
// 因为这极大可能是缩写
if (range.Count <= 3)
{
return;
}

std::string_view wordView = _source.substr(range.Start, range.Count);
std::string word;
word.resize(wordView.size());
Expand Down
68 changes: 67 additions & 1 deletion CodeService/src/Spell/TextParser.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,70 @@
#include "CodeService/Spell/TextParser.h"
#include "CodeService/Spell/IdentifyParser.h"

using namespace spell;
// TODO implement later

bool IsIdentifier(char ch)
{
return ch > 0 && (std::isalnum(ch) || ch == '_');
}

TextParser::TextParser(std::string_view source)
: _source(source)
{
}

void TextParser::Parse()
{
enum class ParseState
{
Unknown,
Identify
} state = ParseState::Unknown;

std::size_t start = 0;
for (std::size_t i = 0; i != _source.size(); i++)
{
char ch = _source[i];
switch (state)
{
case ParseState::Unknown:
{
if (IsIdentifier(ch))
{
state = ParseState::Identify;
start = i;
}
break;
}
case ParseState::Identify:
{
if (!IsIdentifier(ch))
{
state = ParseState::Unknown;
PushIdentifier(WordRange(start, i - start));
}
break;
}
}
}
if (state == ParseState::Identify)
{
PushIdentifier(WordRange(start, _source.size() - start));
}
}

std::vector<Word>& TextParser::GetIdentifiers()
{
return _identifiers;
}

void TextParser::PushIdentifier(spell::WordRange range)
{
if (range.Count <= 3)
{
return;
}

std::string_view identifyView = _source.substr(range.Start, range.Count);
_identifiers.emplace_back(range, std::string(identifyView));
}
3 changes: 3 additions & 0 deletions include/CodeService/Spell/CodeSpellChecker.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ class CodeSpellChecker
private:
void IdentifyAnalysis(DiagnosisContext& ctx, LuaToken& token, const CustomDictionary& customDict);

void TextAnalysis(DiagnosisContext& ctx, LuaToken& token, const CustomDictionary& customDict);

std::shared_ptr<SymSpell> _symSpell;
std::unordered_map<std::string, std::shared_ptr<spell::IdentifyParser>> _caches;
};

14 changes: 6 additions & 8 deletions include/CodeService/Spell/TextParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,17 @@ namespace spell {
class TextParser
{
public:
enum class TextType
{
Unknown,
End,
};

TextParser(std::string_view source);

void Parse();

std::vector<spell::Word>& GetWords();
std::vector<Word>& GetIdentifiers();

private:
TextType Lex();


void PushIdentifier(spell::WordRange range);
std::string_view _source;
std::vector<Word> _identifiers;
};
}

0 comments on commit 1bdfe5a

Please sign in to comment.