-
Notifications
You must be signed in to change notification settings - Fork 95
Closed
Description
func FillNextTokenBitmask becomes slower and slower when using EBNF grammer .
-grammer
std::string ebnf_test = R"(
root ::= en-char+ ([ \t\n] en-char+)*
en-char ::= [0-9a-zA-Z!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~]
)";- code
while (!matcher.IsTerminated()) {
Vectorxf logits = Vectorxf::Zero(full_vocab_size);
for (int i = 0; i < full_vocab_size; i++) {
logits[i] = dist(gen);
}
auto logits_dl = create_DLTensor_float(logits.data(), logits_shape);
Vectorxi32 bitmask = Vectorxi32::Constant(len, -1);
DLTensor bitmask_dltensor{reinterpret_cast<void *>(bitmask.data()),
DLDevice{kDLCPU, 0},
static_cast<int32_t>(bitmask_shape.size()),
xgrammar::GetBitmaskDLType(),
bitmask_shape.data(),
nullptr,
0};
spdlog::stopwatch sw;
matcher.FillNextTokenBitmask(&bitmask_dltensor, 0);
spdlog::info("iter {} , FillNextTokenBitmask spent: {} s", decoded.size(), sw);
xgrammar::ApplyTokenBitmaskInplaceCPU(&logits_dl, bitmask_dltensor);
// 贪心采样
size_t next;
logits.maxCoeff(&next);
matcher.AcceptToken((int32_t) next, false);
if (next == 151645)
break;
if (decoded.size() > 1024)
break;
decoded.push_back((int32_t) next);
}
Metadata
Metadata
Assignees
Labels
No labels

