Skip to content

Commit

Permalink
ICU-22908 MF2: Update spec tests and update implementation for recent…
Browse files Browse the repository at this point in the history
… spec changes

Updating the spec tests requires two implementation changes:
* Match on variables rather than expressions --
  see unicode-org/message-format-wg#877
* Require attribute values to be literals (if present) --
  see unicode-org/message-format-wg#894

This updates the spec tests to commit 6c3704f41a9c24427126429fb43992b03609dfc8
in https://github.com/unicode-org/message-format-wg/ . Any changes
following that commit will be addressed in a future PR.
  • Loading branch information
catamorphism committed Dec 9, 2024
1 parent 2e57f07 commit 62b95bb
Show file tree
Hide file tree
Showing 32 changed files with 551 additions and 500 deletions.
24 changes: 18 additions & 6 deletions icu4c/source/i18n/messageformat2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,13 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme
CHECK_ERROR(status);
U_ASSERT(!dataModel.hasPattern());

const Expression* selectors = dataModel.getSelectorsInternal();
const VariableName* selectors = dataModel.getSelectorsInternal();
// 1. Let res be a new empty list of resolved values that support selection.
// (Implicit, since `res` is an out-parameter)
// 2. For each expression exp of the message's selectors
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
// 2i. Let rv be the resolved value of exp.
ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status);
ResolvedSelector rv = formatSelector(env, selectors[i], context, status);
if (rv.hasSelector()) {
// 2ii. If selection is supported for rv:
// (True if this code has been reached)
Expand Down Expand Up @@ -614,7 +614,10 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo


// Evaluate the operand
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const {
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
const Operand& rand,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
Expand All @@ -628,7 +631,13 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, cons
}

// Must be variable
const VariableName& var = rand.asVariable();
return resolveVariables(env, rand.asVariable(), context, status);
}

ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
const VariableName& var,
MessageContext& context,
UErrorCode &status) const {
// Resolve the variable
if (env.has(var)) {
const Closure& referent = env.lookup(var);
Expand Down Expand Up @@ -691,13 +700,16 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
}
}

ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const {
ResolvedSelector MessageFormatter::formatSelector(const Environment& globalEnv,
const VariableName& var,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}

// Resolve expression to determine if it's a function call
ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status);
ResolvedSelector exprResult = resolveVariables(globalEnv, var, context, status);

DynamicErrors& err = context.getErrors();

Expand Down
21 changes: 9 additions & 12 deletions icu4c/source/i18n/messageformat2_checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,12 @@ TypeEnvironment::~TypeEnvironment() {}

// ---------------------

UnicodeString Checker::normalizeNFC(const Key& k) const {
Key Checker::normalizeNFC(const Key& k) const {
if (k.isWildcard()) {
return UnicodeString("*");
return k;
}
return context.normalizeNFC(k.asLiteral().unquoted());
return Key(Literal(k.asLiteral().isQuoted(),
context.normalizeNFC(k.asLiteral().unquoted())));
}

static bool areDefaultKeys(const Key* keys, int32_t len) {
Expand Down Expand Up @@ -216,18 +217,14 @@ void Checker::checkVariants(UErrorCode& status) {
}
}

void Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selectorExpr, UErrorCode& status) {
void Checker::requireAnnotated(const TypeEnvironment& t,
const VariableName& selectorVar,
UErrorCode& status) {
CHECK_ERROR(status);

if (selectorExpr.isFunctionCall()) {
if (t.get(selectorVar) == TypeEnvironment::Type::Annotated) {
return; // No error
}
const Operand& rand = selectorExpr.getOperand();
if (rand.isVariable()) {
if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) {
return; // No error
}
}
// If this code is reached, an error was detected
errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
}
Expand All @@ -237,7 +234,7 @@ void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {

// Check each selector; if it's not annotated, emit a
// "missing selector annotation" error
const Expression* selectors = dataModel.getSelectorsInternal();
const VariableName* selectors = dataModel.getSelectorsInternal();
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
requireAnnotated(t, selectors[i], status);
}
Expand Down
4 changes: 2 additions & 2 deletions icu4c/source/i18n/messageformat2_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ namespace message2 {
: dataModel(d), errors(e), context(mf) {}
private:

UnicodeString normalizeNFC(const Key&) const;
Key normalizeNFC(const Key&) const;

void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
void requireAnnotated(const TypeEnvironment&, const VariableName&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const OptionMap&, UErrorCode&);
Expand Down
26 changes: 13 additions & 13 deletions icu4c/source/i18n/messageformat2_data_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,9 +693,9 @@ Matcher::Matcher(const Matcher& other) {
numSelectors = other.numSelectors;
numVariants = other.numVariants;
UErrorCode localErrorCode = U_ZERO_ERROR;
selectors.adoptInstead(copyArray<Expression>(other.selectors.getAlias(),
numSelectors,
localErrorCode));
selectors.adoptInstead(copyArray<VariableName>(other.selectors.getAlias(),
numSelectors,
localErrorCode));
variants.adoptInstead(copyArray<Variant>(other.variants.getAlias(),
numVariants,
localErrorCode));
Expand All @@ -704,7 +704,7 @@ Matcher::Matcher(const Matcher& other) {
}
}

Matcher::Matcher(Expression* ss, int32_t ns, Variant* vs, int32_t nv)
Matcher::Matcher(VariableName* ss, int32_t ns, Variant* vs, int32_t nv)
: selectors(ss), numSelectors(ns), variants(vs), numVariants(nv) {}

Matcher::~Matcher() {}
Expand All @@ -726,7 +726,7 @@ const Binding* MFDataModel::getLocalVariablesInternal() const {
return bindings.getAlias();
}

const Expression* MFDataModel::getSelectorsInternal() const {
const VariableName* MFDataModel::getSelectorsInternal() const {
U_ASSERT(!bogus);
U_ASSERT(!hasPattern());
return std::get_if<Matcher>(&body)->selectors.getAlias();
Expand Down Expand Up @@ -788,15 +788,13 @@ MFDataModel::Builder& MFDataModel::Builder::addBinding(Binding&& b, UErrorCode&
return *this;
}

/*
selector must be non-null
*/
MFDataModel::Builder& MFDataModel::Builder::addSelector(Expression&& selector, UErrorCode& status) noexcept {
MFDataModel::Builder& MFDataModel::Builder::addSelector(VariableName&& selector,
UErrorCode& status) {
THIS_ON_ERROR(status);

buildSelectorsMessage(status);
U_ASSERT(selectors != nullptr);
selectors->adoptElement(create<Expression>(std::move(selector), status), status);
selectors->adoptElement(create<VariableName>(std::move(selector), status), status);

return *this;
}
Expand Down Expand Up @@ -832,11 +830,11 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) {
if (other.hasPattern()) {
body = *std::get_if<Pattern>(&other.body);
} else {
const Expression* otherSelectors = other.getSelectorsInternal();
const VariableName* otherSelectors = other.getSelectorsInternal();
const Variant* otherVariants = other.getVariantsInternal();
int32_t numSelectors = other.numSelectors();
int32_t numVariants = other.numVariants();
Expression* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
VariableName* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
Variant* copiedVariants = copyArray(otherVariants, numVariants, localErrorCode);
if (U_FAILURE(localErrorCode)) {
bogus = true;
Expand Down Expand Up @@ -865,7 +863,9 @@ MFDataModel::MFDataModel(const MFDataModel::Builder& builder, UErrorCode& errorC
int32_t numVariants = builder.variants->size();
int32_t numSelectors = builder.selectors->size();
LocalArray<Variant> variants(copyVectorToArray<Variant>(*builder.variants, errorCode), errorCode);
LocalArray<Expression> selectors(copyVectorToArray<Expression>(*builder.selectors, errorCode), errorCode);
LocalArray<VariableName> selectors(copyVectorToArray<VariableName>(*builder.selectors,
errorCode),
errorCode);
if (U_FAILURE(errorCode)) {
bogus = true;
return;
Expand Down
95 changes: 49 additions & 46 deletions icu4c/source/i18n/messageformat2_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,21 +512,13 @@ VariableName Parser::parseVariableName(UErrorCode& errorCode) {
VariableName result;

U_ASSERT(inBounds());
// If the '$' is missing, we don't want a binding
// for this variable to be created.
bool valid = peek() == DOLLAR;

parseToken(DOLLAR, errorCode);
if (!inBounds()) {
ERROR(errorCode);
return result;
}
UnicodeString varName = parseName(errorCode);
// Set the name to "" if the variable wasn't
// declared correctly
if (!valid) {
varName.remove();
}
return VariableName(varName);
return VariableName(parseName(errorCode));
}

/*
Expand Down Expand Up @@ -863,27 +855,17 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
parseTokenWithWhitespace(EQUALS, errorCode);

UnicodeString rhsStr;
// Parse RHS, which is either a literal or variable
switch (peek()) {
case DOLLAR: {
rand = Operand(parseVariableName(errorCode));
break;
}
default: {
// Must be a literal
rand = Operand(parseLiteral(errorCode));
break;
}
}
U_ASSERT(!rand.isNull());
// Parse RHS, which must be a literal
// attribute = "@" identifier [o "=" o literal]
rand = Operand(parseLiteral(errorCode));
} else {
// attribute -> "@" identifier [[s] "=" [s]]
// Use null operand, which `rand` is already set to
// "Backtrack" by restoring the whitespace (if there was any)
index = savedIndex;
}

attrAdder.addAttribute(lhs, std::move(rand), errorCode);
attrAdder.addAttribute(lhs, std::move(Operand(rand)), errorCode);
}

/*
Expand Down Expand Up @@ -1722,6 +1704,22 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
return result.build(status);
}

void Parser::parseVariant(UErrorCode& status) {
CHECK_ERROR(status);

// At least one key is required
SelectorKeys keyList(parseNonEmptyKeys(status));

// parseNonEmptyKeys() consumes any trailing whitespace,
// so the pattern can be consumed next.

// Restore precondition before calling parsePattern()
// (which must return a non-null value)
CHECK_BOUNDS(status);
Pattern rhs = parseQuotedPattern(status);

dataModel.addVariant(std::move(keyList), std::move(rhs), status);
}

/*
Consume a `selectors` (matching the nonterminal in the grammar),
Expand All @@ -1741,22 +1739,25 @@ void Parser::parseSelectors(UErrorCode& status) {
// Parse selectors
// "Backtracking" is required here. It's not clear if whitespace is
// (`[s]` selector) or (`[s]` variant)
while (isWhitespace(peek()) || peek() == LEFT_CURLY_BRACE) {
parseOptionalWhitespace(status);
while (isWhitespace(peek()) || peek() == DOLLAR) {
int32_t whitespaceStart = index;
parseRequiredWhitespace(status);
// Restore precondition
CHECK_BOUNDS(status);
if (peek() != LEFT_CURLY_BRACE) {
if (peek() != DOLLAR) {
// This is not necessarily an error, but rather,
// means the whitespace we parsed was the optional
// whitespace preceding the first variant, not the
// optional whitespace preceding a subsequent expression.
// required whitespace preceding a subsequent variable.
// In that case, "push back" the whitespace.
normalizedInput.truncate(normalizedInput.length() - 1);
index = whitespaceStart;
break;
}
Expression expression;
expression = parseExpression(status);
VariableName var = parseVariableName(status);
empty = false;

dataModel.addSelector(std::move(expression), status);
dataModel.addSelector(std::move(var), status);
CHECK_ERROR(status);
}

Expand All @@ -1772,27 +1773,29 @@ void Parser::parseSelectors(UErrorCode& status) {
} \

// Parse variants
// matcher = match-statement s variant *(o variant)

// Parse first variant
parseRequiredWhitespace(status);
if (!inBounds()) {
ERROR(status);
return;
}
parseVariant(status);
if (!inBounds()) {
// Not an error; there might be only one variant
return;
}

while (isWhitespace(peek()) || isKeyStart(peek())) {
// Trailing whitespace is allowed
parseOptionalWhitespace(status);
// Restore the precondition.
// Trailing whitespace is allowed.
if (!inBounds()) {
return;
}

// At least one key is required
SelectorKeys keyList(parseNonEmptyKeys(status));

CHECK_ERROR(status);

// parseNonEmptyKeys() consumes any trailing whitespace,
// so the pattern can be consumed next.

// Restore precondition before calling parsePattern()
// (which must return a non-null value)
CHECK_BOUNDS(status);
Pattern rhs = parseQuotedPattern(status);

dataModel.addVariant(std::move(keyList), std::move(rhs), status);
parseVariant(status);

// Restore the precondition, *without* erroring out if we've
// reached the end of input. That's because it's valid for the
Expand Down
3 changes: 2 additions & 1 deletion icu4c/source/i18n/messageformat2_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ namespace message2 {
void parseUnsupportedStatement(UErrorCode&);
void parseLocalDeclaration(UErrorCode&);
void parseInputDeclaration(UErrorCode&);
void parseSelectors(UErrorCode&);
void parseSelectors(UErrorCode&);
void parseVariant(UErrorCode&);

void parseWhitespaceMaybeRequired(bool, UErrorCode&);
void parseRequiredWhitespace(UErrorCode&);
Expand Down
6 changes: 4 additions & 2 deletions icu4c/source/i18n/messageformat2_serializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,18 +246,20 @@ void Serializer::serializeDeclarations() {

void Serializer::serializeSelectors() {
U_ASSERT(!dataModel.hasPattern());
const Expression* selectors = dataModel.getSelectorsInternal();
const VariableName* selectors = dataModel.getSelectorsInternal();

emit(ID_MATCH);
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
// No whitespace needed here -- see `selectors` in the grammar
whitespace();
emit(DOLLAR);
emit(selectors[i]);
}
}

void Serializer::serializeVariants() {
U_ASSERT(!dataModel.hasPattern());
const Variant* variants = dataModel.getVariantsInternal();
whitespace();
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
const Variant& v = variants[i];
emit(v.getKeys());
Expand Down
Loading

0 comments on commit 62b95bb

Please sign in to comment.