From dda1bd27d39dba10e5fad8abf2dbc8779ca9cc7e Mon Sep 17 00:00:00 2001 From: leiyre Date: Fri, 21 Jan 2022 18:48:40 +0100 Subject: [PATCH] fix(#1015): manage emojis in Token Classification records (#1016) This PR includes external library for manage emojis that are represented by several characters and cause annotation errors Closes #1015 --- frontend/components/token-classifier/results/TextSpan.vue | 7 +++++-- frontend/models/TokenClassification.js | 5 +++-- frontend/package.json | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/frontend/components/token-classifier/results/TextSpan.vue b/frontend/components/token-classifier/results/TextSpan.vue index 809e5d7067..f512a4daa1 100755 --- a/frontend/components/token-classifier/results/TextSpan.vue +++ b/frontend/components/token-classifier/results/TextSpan.vue @@ -76,6 +76,7 @@ import ClickOutside from "v-click-outside"; import "assets/icons/check"; import "assets/icons/cross"; +import { substring } from "stringz"; export default { directives: { @@ -113,13 +114,15 @@ export default { return this.spans[this.spanId]; }, text() { - return this.record.raw_text.slice( + return substring( + this.record.raw_text, this.spans[this.spanId].start, this.spans[this.spanId].end ); }, whiteSpace() { - return this.record.raw_text.slice( + return substring( + this.record.raw_text, this.spans[this.spanId].end, this.spans[this.spanId + 1] ? this.spans[this.spanId + 1].start : "" ); diff --git a/frontend/models/TokenClassification.js b/frontend/models/TokenClassification.js index ac25286318..2473095fbc 100644 --- a/frontend/models/TokenClassification.js +++ b/frontend/models/TokenClassification.js @@ -17,6 +17,7 @@ import { ObservationDataset, USER_DATA_METADATA_KEY } from "./Dataset"; import { BaseRecord, BaseSearchQuery, BaseSearchResults } from "./Common"; +import { indexOf, length } from "stringz"; class TokenClassificationRecord extends BaseRecord { tokens; @@ -28,8 +29,8 @@ class TokenClassificationRecord extends BaseRecord { super({ ...superData }); const { visualTokens } = tokens.reduce( ({ visualTokens, startPosition }, token) => { - const start = raw_text.indexOf(token, startPosition); - const end = start + token.length; + const start = indexOf(raw_text, token, startPosition); + const end = start + length(token); return { visualTokens: [...visualTokens, { start, end, text: token }], startPosition: end, diff --git a/frontend/package.json b/frontend/package.json index 2d16b3dcd2..06b792d6c4 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -33,6 +33,7 @@ "nuxt": "^2.14.6", "nuxt-highlightjs": "^1.0.1", "sass-loader": "^10.1.0", + "stringz": "^2.1.0", "v-click-outside": "^3.1.2", "vue-moment": "^4.1.0", "vue-svgicon": "^3.2.9", @@ -59,4 +60,4 @@ "prettier": "^2.2.1", "vue-jest": "^3.0.7" } -} \ No newline at end of file +}