Skip to content

Commit

Permalink
feat(developer): hint/warn on range check 🙀
Browse files Browse the repository at this point in the history
- add warning into tran.ts - will be one warning per <tranform> element

#10316
  • Loading branch information
srl295 committed Feb 2, 2024
1 parent 10940be commit 80ad3a8
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 7 deletions.
4 changes: 2 additions & 2 deletions developer/src/kmc-ldml/src/compiler/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,11 @@ export class CompilerMessages {

static Hint_CharClassImplicitDenorm = (o: { lowestCh: number }) =>
m(this.HINT_CharClassImplicitDenorm, `File has character classes which span non-NFD character(s), including ${util.describeCodepoint(o.lowestCh)}. These will not match any text.`);
static HINT_CharClassImplicitDenorm = SevHint | 0x0023;
static HINT_CharClassImplicitDenorm = SevHint | 0x0026;

static Warn_CharClassExplicitDenorm = (o: { lowestCh: number }) =>
m(this.WARN_CharClassExplicitDenorm, `File has character classes which include non-NFD characters(s), including ${util.describeCodepoint(o.lowestCh)}. These will not match any text.`);
static WARN_CharClassExplicitDenorm = SevWarn | 0x0024;
static WARN_CharClassExplicitDenorm = SevWarn | 0x0027;

}

Expand Down
60 changes: 60 additions & 0 deletions developer/src/kmc-ldml/src/compiler/tran.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ export abstract class TransformCompiler<T extends TransformCompilerType, TranBas
// don't unescape literals here, because we are going to pass them through into the regex
cookedFrom = util.unescapeStringToRegex(cookedFrom);

this.checkRanges(cookedFrom); // check before normalizing

if (!sections.meta.normalizionDisabled) {
// nfd here.
cookedFrom = MarkerParser.nfd_markers(cookedFrom, true);
Expand Down Expand Up @@ -197,6 +199,64 @@ export abstract class TransformCompiler<T extends TransformCompilerType, TranBas
defaults.delete(this.id);
return defaults;
}

private checkRanges(cookedFrom: string) {
// extract all of the potential ranges - but don't match any-markers!
const anyRange = /(?<!\\uffff\\u0008)\[([^\]]+)\]/g;
const ranges = cookedFrom.matchAll(anyRange);

if (!ranges) return;

// extract inner members of a range (inside the [])
const rangeRegex = /(\\u[0-9a-fA-F]{4}|.)-(\\u[0-9a-fA-F]{4}|.)|./g;

const rangeExplicit = new util.NFDAnalyzer();
const rangeImplicit = new util.NFDAnalyzer();

/** process an explicit entry */
function processExplicit(s: string) {
if (s.startsWith('\\u{')) {
s = util.unescapeString(s);
} else if(s.startsWith('\\u')) {
s = util.unescapeOneQuadString(s);
}
rangeExplicit.add(s);
return s;
}

for (const [, sub] of ranges) {
const subRanges = sub.matchAll(rangeRegex);
for (const [all, start, end] of subRanges) {
if (!start && !end) {
// explicit single char
processExplicit(all); // matched one char
} else {
// start-end range - get explicit start and end chars
const s = processExplicit(start);
const sch = s.codePointAt(0);
const e = processExplicit(end);
const ech = e.codePointAt(0);
// now, process the inner chars, not including explicit
for (let n = sch; n < ech; n++) {
// add inner text
rangeImplicit.add(String.fromCodePoint(n));
}
}
}
}

// analyze ranges
const explicitSet = rangeExplicit.analyze()?.get(util.BadStringType.denormalized);
if (explicitSet) {
this.callbacks.reportMessage(CompilerMessages.Warn_CharClassExplicitDenorm({ lowestCh: explicitSet.values().next().value }));
} else {
// don't analyze the implicit set of THIS range, if explicit is already problematic
const implicitSet = rangeImplicit.analyze()?.get(util.BadStringType.denormalized);
if (implicitSet) {
this.callbacks.reportMessage(CompilerMessages.Hint_CharClassImplicitDenorm({ lowestCh: implicitSet.values().next().value }));
}
}
}
}

export class TranCompiler extends TransformCompiler<'simple', Tran /*, TranItem*/> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<transforms type="simple">
<transformGroup>
<!-- all NFD but crosses non-NFD-->
<transform from="[\u{0020}-\u{01ff}]" to="x" />
<transform from="[\u{0020}-\u{0250}]" to="x" />
</transformGroup>
</transforms>
</keyboard3>
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

<transforms type="simple">
<transformGroup>
<!-- warning, includes NFD start/end-->
<!-- warning, includes NFC start/end-->
<transform from="[á-é]" to="x" />
</transformGroup>
</transforms>
Expand Down
6 changes: 3 additions & 3 deletions developer/src/kmc-ldml/test/test-tran.ts
Original file line number Diff line number Diff line change
Expand Up @@ -248,19 +248,19 @@ describe('tran', function () {
{
subpath: 'sections/tran/tran-warn-range.xml',
warnings: [
CompilerMessages.Warn_CharClassExplicitDenorm({lowestCh: 0}),
CompilerMessages.Warn_CharClassExplicitDenorm({lowestCh: 0xE1}),
],
},
{
subpath: 'sections/tran/tran-hint-range.xml',
warnings: [
CompilerMessages.Hint_CharClassImplicitDenorm({lowestCh: 0}),
CompilerMessages.Hint_CharClassImplicitDenorm({lowestCh: 0xc0}),
],
},
{
subpath: 'sections/tran/tran-hint-range2.xml',
warnings: [
CompilerMessages.Hint_CharClassImplicitDenorm({lowestCh: 0}),
CompilerMessages.Hint_CharClassImplicitDenorm({lowestCh: 0xC0}),
],
},
], tranDependencies);
Expand Down

0 comments on commit 80ad3a8

Please sign in to comment.