Skip to content

Commit

Permalink
Merge pull request #208 from VisLab/update-tokenizer
Browse files Browse the repository at this point in the history
Rewrite of tokenizer and introduction of object-based test cases
  • Loading branch information
happy5214 authored Oct 29, 2024
2 parents 4e7d98e + 24f1ffc commit 9561c87
Show file tree
Hide file tree
Showing 33 changed files with 3,641 additions and 1,096 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
- name: Download dependencies
run: npm ci
- name: Test & publish code coverage
uses: paambaati/codeclimate-action@v8.0.0
uses: paambaati/codeclimate-action@v9.0.0
env:
CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}
with:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dist/
spec_tests/*.txt
spec_tests/temp*.json
spec_tests/temp.spec.js
tests/temp.spec.js

# Unit test / coverage reports
htmlcov/
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# hed-validator

This package contains a JavaScript validator for HED (hierarchical event descriptor) strings.
This package contains a JavaScript validator for HED (Hierarchical Event Descriptor) strings.

[HED](https://www.hedtags.org/) is a system for annotating events using comma-separated path strings.
Any type of event can be annotated using HED-type syntax.
Expand Down Expand Up @@ -57,3 +57,8 @@ To use the `hed-validator`, you must install the npm `hed-validator` package and

A sample of current `hed-validator` usage can be found in the BIDS validator in
[`hed.js`](https://github.com/bids-standard/bids-validator/blob/5dfc3938ea8ce128c7db295e7bebc8eed2de1ce6/bids-validator/validators/hed.js).

## Repository notes:

The `develop` branch is now the default branch. All changes to the repository should
be done as PRs (pull requests) to the `develop` branch.
8 changes: 6 additions & 2 deletions bids/validator/bidsHedTsvValidator.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ export class BidsHedTsvValidator {
* @private
*/
_validateHedColumn() {
if (this.tsvFile.hedColumnHedStrings.length === 0) {
// no HED column strings to validate
return []
}
return this.tsvFile.hedColumnHedStrings.flatMap((hedString, rowIndexMinusTwo) =>
this._validateHedColumnString(hedString, rowIndexMinusTwo + 2),
)
Expand Down Expand Up @@ -230,7 +234,6 @@ export class BidsHedTsvParser {
*/
_parseHedRows(tsvHedRows) {
const hedStrings = []

tsvHedRows.forEach((row, index) => {
const hedString = this._parseHedRow(row, index + 2)
if (hedString !== null) {
Expand All @@ -248,9 +251,9 @@ export class BidsHedTsvParser {
* @private
*/
_mergeEventRows(rowStrings) {
const eventStrings = []
const groupedTsvRows = groupBy(rowStrings, (rowString) => rowString.onset)
const sortedOnsetTimes = Array.from(groupedTsvRows.keys()).sort((a, b) => a - b)
const eventStrings = []
for (const onset of sortedOnsetTimes) {
const onsetRows = groupedTsvRows.get(onset)
const onsetEventString = new BidsTsvEvent(this.tsvFile, onsetRows)
Expand All @@ -275,6 +278,7 @@ export class BidsHedTsvParser {
hedStringParts.push(hedStringPart)
}
}
if (hedStringParts.length === 0) return null

const hedString = hedStringParts.join(',')

Expand Down
15 changes: 15 additions & 0 deletions common/issues/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,26 @@ export default {
level: 'error',
message: stringTemplate`Invalid tag - "${'tag'}".`,
},
extraSlash: {
hedCode: 'TAG_INVALID',
level: 'error',
message: stringTemplate`Tag has extra slash at index ${'index'} of string "${'string'}".`,
},
extraBlank: {
hedCode: 'TAG_INVALID',
level: 'error',
message: stringTemplate`Tag has extra blank at index ${'index'} of string "${'string'}".`,
},
extraCommaOrInvalid: {
hedCode: 'TAG_INVALID',
level: 'error',
message: stringTemplate`Either "${'previousTag'}" contains a comma when it should not or "${'tag'}" is not a valid tag.`,
},
invalidTagPrefix: {
hedCode: 'TAG_NAMESPACE_PREFIX_INVALID',
level: 'error',
message: stringTemplate`Either tag prefix at index ${'index'} contains non-alphabetic characters or does not have an associated schema.`,
},
multipleUniqueTags: {
hedCode: 'TAG_NOT_UNIQUE',
level: 'error',
Expand Down
30 changes: 25 additions & 5 deletions parser/columnSplicer.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,25 +111,36 @@ export class ColumnSplicer {
*/
_spliceTemplate(columnTemplate) {
const columnName = columnTemplate.originalTag

// HED column handled specially
if (columnName === 'HED') {
return this._spliceHedColumnTemplate()
}

// Not the HED column so treat as usual
const replacementString = this.columnReplacements.get(columnName)

// Handle null or undefined replacement strings
if (replacementString === null) {
return null
}
if (columnName === 'HED') {
return this._spliceHedColumnTemplate()
}
if (replacementString === undefined) {
this.issues.push(generateIssue('undefinedCurlyBraces', { column: columnName }))
return []
}

// Handle recursive curly braces
if (replacementString.columnSplices.length > 0) {
this.issues.push(generateIssue('recursiveCurlyBraces', { column: columnName }))
return []
}
const tagsHavePlaceholder = replacementString.tags.some((tag) => tag.originalTagName === '#')
if (tagsHavePlaceholder) {

// Handle value templates with placeholder
if (replacementString.tags.some((tag) => tag.originalTagName === '#')) {
return this._spliceValueTemplate(columnTemplate)
}

// Default case
return replacementString.parseTree
}

Expand All @@ -142,6 +153,15 @@ export class ColumnSplicer {
_spliceHedColumnTemplate() {
const columnName = 'HED'
const replacementString = this.columnValues.get(columnName)
if (
replacementString === undefined ||
replacementString === null ||
replacementString === 'n/a' ||
replacementString === ''
) {
return null
}

return this._reparseAndSpliceString(replacementString)
}

Expand Down
28 changes: 14 additions & 14 deletions parser/parsedHedTag.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Schema } from '../common/schema/types'
import { getTagLevels, replaceTagNameWithPound } from '../utils/hedStrings'
import ParsedHedSubstring from './parsedHedSubstring'
import { SchemaValueTag } from '../validator/schema/types'
import TagConverter from './converter'
import { TagConverter } from './tagConverter'

/**
* A parsed HED tag.
Expand Down Expand Up @@ -65,7 +65,7 @@ export class ParsedHedTag extends ParsedHedSubstring {
}

/**
* Format this HED tag by removing newlines, double quotes, and slashes.
* Format this HED tag by removing newlines and double quotes.
*
* @returns {string} The formatted version of this tag.
*/
Expand All @@ -78,12 +78,12 @@ export class ParsedHedTag extends ParsedHedSubstring {
if (hedTagString.endsWith('"')) {
hedTagString = hedTagString.slice(0, -1)
}
if (hedTagString.startsWith('/')) {
hedTagString = hedTagString.slice(1)
}
if (hedTagString.endsWith('/')) {
hedTagString = hedTagString.slice(0, -1)
}
// if (hedTagString.startsWith('/')) {
// hedTagString = hedTagString.slice(1)
// }
// if (hedTagString.endsWith('/')) {
// hedTagString = hedTagString.slice(0, -1)
// }
return hedTagString.toLowerCase()
}

Expand Down Expand Up @@ -315,12 +315,12 @@ export class ParsedHed3Tag extends ParsedHedTag {
* @throws {IssueError} If tag conversion or parsing fails.
*/
_convertTag(hedSchemas, hedString, tagSpec) {
const hed3ValidCharacters = /^[^{}[\]()~,\0\t]+$/
if (!hed3ValidCharacters.test(this.originalTag)) {
IssueError.generateAndThrow('internalConsistencyError', {
message: 'The parser failed to properly remove an illegal or special character.',
})
}
// const hed3ValidCharacters = /^[^{}[\]()~,\0\t]+$/
// if (!hed3ValidCharacters.test(this.originalTag)) {
// IssueError.generateAndThrow('internalConsistencyError', {
// message: 'The parser failed to properly remove an illegal or special character.',
// })
// }

const schemaName = tagSpec.library
this.schema = hedSchemas.getSchema(schemaName)
Expand Down
2 changes: 1 addition & 1 deletion parser/converter.js → parser/tagConverter.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { SchemaValueTag } from '../validator/schema/types'
/**
* Converter from a tag specification to a schema-based tag object.
*/
export default class TagConverter {
export class TagConverter {
/**
* A parsed tag token.
* @type {TagSpec}
Expand Down
Loading

0 comments on commit 9561c87

Please sign in to comment.