From 7985eae3e3369767f04b3c10c9be577f021a6a4f Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Sat, 24 Mar 2018 14:25:21 +0100 Subject: [PATCH] fix(swift): lex tag separators properly --- token/swift_lexer.go | 37 +++++++++++++++++++++++++---- token/swift_lexer_test.go | 50 +++++++++++++++++++++++++++++++++++++++ token/token.go | 4 ++-- 3 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 token/swift_lexer_test.go diff --git a/token/swift_lexer.go b/token/swift_lexer.go index d7b83be..312ae29 100644 --- a/token/swift_lexer.go +++ b/token/swift_lexer.go @@ -1,5 +1,9 @@ package token +import ( + "strings" +) + // NewSwiftLexer returns a SwiftLexer ready for parsing the given input string func NewSwiftLexer(name, input string) *SwiftLexer { lexer := NewStringLexer(name, input) @@ -134,8 +138,27 @@ func lexSwiftAlphaNumeric(l *StringLexer) StringLexerStateFn { } func isTagBoundary(s *StringLexer) bool { + oneOf := func(fn ...func() bool) bool { + for _, f := range fn { + if ok := f(); ok { + return true + } + } + return false + } currentPos := s.pos - isTagBoundary := s.accept(string(carriageReturn)) && s.accept(string(lineFeed)) && s.accept(string(dash)+string(tagIdentifier)) + isTagBoundary := s.accept(string(carriageReturn)) && + s.accept(string(lineFeed)) && oneOf( + func() bool { + return strings.HasPrefix(s.input[s.pos:], string(dash)+string(tagIdentifier)) + }, + func() bool { + return strings.HasPrefix(s.input[s.pos:], string(tagIdentifier)) + }, + func() bool { + return s.input[s.pos:] == string(dash) + }, + ) s.pos = currentPos return isTagBoundary } @@ -173,10 +196,14 @@ const ( ) var swiftTokenName = map[Type]string{ - SWIFT_ALPHA: "a", - SWIFT_CHARACTER: "c", - SWIFT_DECIMAL: "d", - SWIFT_NUMERIC: "n", + SWIFT_ALPHA: "a", + SWIFT_CHARACTER: "c", + SWIFT_DECIMAL: "d", + SWIFT_NUMERIC: "n", + SWIFT_ALPHANUMERIC: "an", + SWIFT_DATASET_START: "datasetStart", + SWIFT_TAG_SEPARATOR: "tagSeparator", + SWIFT_MESSAGE_SEPARATOR: "messageSeparator", } func init() { diff --git a/token/swift_lexer_test.go b/token/swift_lexer_test.go new file mode 100644 index 0000000..fc62c79 --- /dev/null +++ b/token/swift_lexer_test.go @@ -0,0 +1,50 @@ +package token + +import ( + "testing" +) + +const githubIssue13TestData = "\r\n" + + ":20:MT940-1803060458\r\n" + + ":21:NONREF\r\n" + + ":25:20040000/12345678EUR\r\n" + + ":28C:0/13\r\n:60M:C170201EUR1234,56\r\n" + + ":61:1702010201DR86,40NMSCNONREF//POS 8888888888\r\n" + + ":86:005?20LASTSCHRIFT/BELAST.?888888888888 8888888884REFERE?22NZ HVV \r\n" + + "A?23BO?24END-TO-END-REF.:?888888888888 8888888884?26CORE / MANDAT\r\n" + + "SREF.:?27VMH008888880001?28GL\xc4UBIGER-ID:?29DE88888888888888888?32H\r\n" + + "AMBURGER HOCHBAHN AG?60Ref. IL888888G8888888/6716\r\n" + + ":61:1702010201DR24,00NMSCNONREF//POS 3409790600\r\n" + + ":86:005?20LASTSCHRIFT/BELAST.?21110865 BEITRAG MITGLIED 888?228888?23\r\n" + + "END-TO-END-REF.:?8888888888888ZV888888Z?25CORE / MANDATSREF.:?26EV\r\n" + + "-000008888?27GL\xc4UBIGER-ID:?88DE88ZZZ8888888888?29Ref. IL888888G2\r\n" + + "145077/1543?32SOME TEST-VEREIN E.V.\r\n" + + ":62M:C170203EUR5378,36\r\n-" + +func Test_SwiftLexer(t *testing.T) { + lexer := NewSwiftLexer("testlexer", githubIssue13TestData) + + var tokens []Token + + for lexer.HasNext() { + tokens = append(tokens, lexer.Next()) + } + + expectedLen := 22 + if len(tokens) != expectedLen { + t.Logf("Expected %d tokens, got %d", expectedLen, len(tokens)) + t.Fail() + } + + var messageSeparatorCount int + for _, tk := range tokens { + if tk.Type() == SWIFT_MESSAGE_SEPARATOR { + messageSeparatorCount++ + } + } + + if messageSeparatorCount != 1 { + t.Logf("Expected one message separator, got %d", messageSeparatorCount) + t.Fail() + } +} diff --git a/token/token.go b/token/token.go index 136afeb..c1951f9 100644 --- a/token/token.go +++ b/token/token.go @@ -156,9 +156,9 @@ func (e elementToken) String() string { return e.val } if len(e.val) > 10 { - return fmt.Sprintf("%.10q...", e.val) + return fmt.Sprintf("%s(%.10q...)", e.typ.String(), e.val) } - return fmt.Sprintf("%q", e.val) + return fmt.Sprintf("%s(%q)", e.typ.String(), e.val) } const (