Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

32-transform-to-and-from-xliff-2 #33

Merged
merged 5 commits into from
Mar 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions pkg/convert/xliff2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package convert

import (
"encoding/xml"
"fmt"

"go.expect.digital/translate/pkg/model"
"golang.org/x/text/language"
)

// XLIFF 2 Specification: https://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html
// XLIFF 2 Example: https://localizely.com/xliff-file/?tab=xliff-20

type xliff2 struct {
XMLName xml.Name `xml:"urn:oasis:names:tc:xliff:document:2.0 xliff"`
Version string `xml:"version,attr"`
SrcLang language.Tag `xml:"srcLang,attr"`
File file `xml:"file"`
}
type file struct {
Units []unit `xml:"unit"`
}

type unit struct {
ID string `xml:"id,attr"`
Notes *[]note `xml:"notes>note"` // Set as pointer to avoid empty <notes></notes> when marshalling.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried many solutions, but this is the only one that works: avoiding <notes></notes> when marshaling xliff2 struct with no Notes.
If there is a better way/ways I am happy to hear about them :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try omitempty

Suggested change
Notes *[]note `xml:"notes>note"` // Set as pointer to avoid empty <notes></notes> when marshalling.
Notes []note `xml:"notes>note,omitempty"`

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does not work, already tried

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<?xml version="1.0" encoding="UTF-8"?>
<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="2.0" srcLang="en">
  <file>
    <unit id="Welcome">
      <notes>
        <note category="description">To welcome a new visitor</note>
      </notes>
      <segment>
        <source>Welcome to our website!</source>
      </segment>
    </unit>
    <unit id="Error">
      <notes>
        <note category="description">To inform the user of an error</note>
      </notes>
      <segment>
        <source>Something went wrong. Please try again later.</source>
      </segment>
    </unit>
    <unit id="Feedback">
      <notes></notes>
      <segment>
        <source>We appreciate your feedback. Thank you for using our service.</source>
      </segment>
    </unit>
  </file>
</xliff>

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not possible using field tag golang/go#7233

It is possible by implementing xml.Marshaler.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's good enough.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good enough as it is (*[]note) or []note `xml:"notes>note,omitempty" leaving empty <notes></notes> when there is no description ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*[]note

Source string `xml:"segment>source"`
}

type note struct {
Category string `xml:"category,attr"`
Content string `xml:",chardata"`
}

// FromXliff2 converts serialized data from the XML data in the XLIFF 2 format into a model.Messages struct.
func FromXliff2(data []byte) (model.Messages, error) {
var xlf xliff2
if err := xml.Unmarshal(data, &xlf); err != nil {
return model.Messages{}, fmt.Errorf("unmarshal XLIFF 2 formatted XML into xliff2 struct: %w", err)
}

messages := model.Messages{Language: xlf.SrcLang, Messages: make([]model.Message, 0, len(xlf.File.Units))}

findDescription := func(u unit) string {
for _, note := range *u.Notes {
if note.Category == "description" {
return note.Content
}
}

return ""
}

for _, unit := range xlf.File.Units {
messages.Messages = append(messages.Messages, model.Message{
ID: unit.ID,
Message: unit.Source,
Description: findDescription(unit),
})
}

return messages, nil
}

// ToXliff2 converts a model.Messages struct into a byte slice in the XLIFF 2 format.
func ToXliff2(messages model.Messages) ([]byte, error) {
xlf := xliff2{
Version: "2.0",
SrcLang: messages.Language,
File: file{
Units: make([]unit, 0, len(messages.Messages)),
},
}

for _, msg := range messages.Messages {
var notes *[]note
if msg.Description != "" {
notes = &[]note{{Category: "description", Content: msg.Description}}
}

xlf.File.Units = append(xlf.File.Units, unit{
ID: msg.ID,
Source: msg.Message,
Notes: notes,
})
}

data, err := xml.Marshal(&xlf)
if err != nil {
return nil, fmt.Errorf("marshal xliff2 struct to XLIFF 2 formatted XML: %w", err)
}

dataWithHeader := append([]byte(xml.Header), data...) // prepend generic XML header
Copy link
Contributor Author

@VladislavsPerkanuks VladislavsPerkanuks Mar 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding XML Declaration is recommended
https://www.w3.org/TR/xml/#sec-prolog-dtd


return dataWithHeader, nil
}
203 changes: 203 additions & 0 deletions pkg/convert/xliff2_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package convert

import (
"fmt"
"regexp"
"testing"

"github.com/stretchr/testify/assert"
"go.expect.digital/translate/pkg/model"
"golang.org/x/text/language"
)

func assertEqualXml(t *testing.T, expected, actual []byte) bool {
t.Helper()

// Matches a substring that starts with > and ends with < with zero or more whitespace in between.
re := regexp.MustCompile(`>(\s*)<`)
expectedTrimmed := re.ReplaceAllString(string(expected), "><")
actualTrimmed := re.ReplaceAllString(string(actual), "><")

return assert.Equal(t, expectedTrimmed, actualTrimmed)
}

func TestFromXliff2(t *testing.T) {
t.Parallel()

tests := []struct {
name string
wantErr error
data []byte
want model.Messages
}{
{
name: "All OK",
data: []byte(`<?xml version="1.0" encoding="UTF-8"?>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And here maybe input?

<xliff version="2.0" xmlns="urn:oasis:names:tc:xliff:document:2.0" srcLang="en" trgLang="fr">
<file id="ngi18n" original="ng.template">
<unit id="common.welcome">
<notes>
<note category="location">src/app/app.component.html:16</note>
</notes>
<segment>
<source>Welcome!</source>
<target>Bienvenue!</target>
</segment>
</unit>
<unit id="common.app.title">
<notes>
<note category="location">src/app/app.component.html:4</note>
<note category="description">App title</note>
</notes>
<segment>
<source>Diary</source>
<target>Agenda</target>
</segment>
</unit>
</file>
</xliff>`),
want: model.Messages{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it's better to call it expected?
expected: model.Messages...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you are right, but I guess it should be in a separate PR, as if I change I should change in all files to remain consistency

Language: language.English,
Messages: []model.Message{
{
ID: "common.welcome",
Message: "Welcome!",
},
{
ID: "common.app.title",
Message: "Diary",
Description: "App title",
},
},
},
wantErr: nil,
},
{
name: "Malformed language tag",
data: []byte(`<?xml version="1.0" encoding="UTF-8"?>
<xliff version="2.0" xmlns="urn:oasis:names:tc:xliff:document:2.0" srcLang="xyz-ZY-Latn" trgLang="fr">
<file id="ngi18n" original="ng.template">
<unit id="common.welcome">
<notes>
<note category="location">src/app/app.component.html:16</note>
</notes>
<segment>
<source>Welcome!</source>
<target>Bienvenue!</target>
</segment>
</unit>
<unit id="common.app.title">
<notes>
<note category="location">src/app/app.component.html:4</note>
<note category="description">App title</note>
</notes>
<segment>
<source>Diary</source>
<target>Agenda</target>
</segment>
</unit>
</file>
</xliff>`),
wantErr: fmt.Errorf("language: subtag \"xyz\" is well-formed but unknown"),
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

result, err := FromXliff2(tt.data)
if tt.wantErr != nil {
assert.ErrorContains(t, err, tt.wantErr.Error())
return
}

if !assert.NoError(t, err) {
return
}

assert.Equal(t, tt.want.Language, result.Language)
assert.ElementsMatch(t, tt.want.Messages, result.Messages)
})
}
}

func Test_ToXliff2(t *testing.T) {
t.Parallel()

tests := []struct {
name string
want []byte
wantErr error
messages model.Messages
}{
{
name: "All OK",
want: []byte(`<?xml version="1.0" encoding="UTF-8"?>
<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="2.0" srcLang="en">
<file>
<unit id="Welcome">
<notes>
<note category="description">To welcome a new visitor</note>
</notes>
<segment>
<source>Welcome to our website!</source>
</segment>
</unit>
<unit id="Error">
<notes>
<note category="description">To inform the user of an error</note>
</notes>
<segment>
<source>Something went wrong. Please try again later.</source>
</segment>
</unit>
<unit id="Feedback">
<segment>
<source>We appreciate your feedback. Thank you for using our service.</source>
</segment>
</unit>
</file>
</xliff>`),
wantErr: nil,
messages: model.Messages{
Language: language.English,
Messages: []model.Message{
{
ID: "Welcome",
Message: "Welcome to our website!",
Description: "To welcome a new visitor",
},
{
ID: "Error",
Message: "Something went wrong. Please try again later.",
Description: "To inform the user of an error",
},
{
ID: "Feedback",
Message: "We appreciate your feedback. Thank you for using our service.",
},
},
},
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

result, err := ToXliff2(tt.messages)

if tt.wantErr != nil {
assert.ErrorContains(t, err, tt.wantErr.Error())
return
}

if !assert.NoError(t, err) {
return
}

assertEqualXml(t, tt.want, result)
})
}
}