Skip to content

Commit

Permalink
feat(objectionary#801): fix the bug with unicode characters in strings
Browse files Browse the repository at this point in the history
  • Loading branch information
volodya-lombrozo committed Oct 26, 2024
1 parent edf791d commit a9eceed
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/it/phi-unphi/verify.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import java.nio.file.Files
String log = new File(basedir, 'build.log').text;
assert log.contains("BUILD SUCCESS"): assertionMessage("BUILD FAILED")
assert log.contains("sin(42.000000) = -0.916522"): assertionMessage("sin(42.000000) = -0.916522 not found")
assert log.contains("We have the field with the unicaode character 'Φ'"): assertionMessage("We can't find the field with the unicode character 'Φ'")
assert log.contains("We have the field with the unicode character 'Φ'"): assertionMessage("We can't find the field with the unicode character 'Φ'")

private String assertionMessage(String message) {
generateGitHubIssue()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ enum DataType {
* String.
*/
STRING("string", String.class,
value -> Optional.ofNullable(value).map(String::valueOf).map(String::getBytes).orElse(null),
value -> Optional.ofNullable(value).map(String::valueOf)
.map(unicode -> unicode.getBytes(StandardCharsets.UTF_8))
.orElse(null),
bytes -> new String(bytes, StandardCharsets.UTF_8)
),

Expand Down
16 changes: 12 additions & 4 deletions src/main/java/org/eolang/jeo/representation/xmir/XmlValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/
package org.eolang.jeo.representation.xmir;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -70,10 +71,17 @@ public String string() {
if (hex.isEmpty()) {
result = "";
} else {
result = Arrays.stream(hex.split("(?<=\\G.{2})"))
.map(ch -> (char) Integer.parseInt(ch, XmlValue.RADIX))
.map(String::valueOf)
.collect(Collectors.joining());
final String[] split = hex.split("(?<=\\G.{2})");
byte[] bytes = new byte[split.length];
for (int i = 0; i < split.length; i++) {
bytes[i] = (byte) Integer.parseInt(split[i], XmlValue.RADIX);
}
result = new String(bytes, StandardCharsets.UTF_8);

// result = Arrays.stream(hex.split("(?<=\\G.{2})"))
// .map(ch -> (char) Integer.parseInt(ch, XmlValue.RADIX))
// .map(String::valueOf)
// .collect(Collectors.joining());
}
return result;
} catch (final NumberFormatException exception) {
Expand Down
17 changes: 14 additions & 3 deletions src/test/java/org/eolang/jeo/representation/xmir/XmlValueTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/
package org.eolang.jeo.representation.xmir;

import java.nio.charset.StandardCharsets;
import java.util.stream.Stream;
import org.eolang.jeo.representation.directives.DirectivesValue;
import org.hamcrest.MatcherAssert;
Expand Down Expand Up @@ -75,19 +76,29 @@ void decodesEncodesCorrectly(final Object origin) {
"Φ", "Ψ", "Ω", "Ϊ", "Ϋ", "ά", "έ", "ή", "ί", "ΰ", "α", "β", "γ", "δ", "ε", "ζ", "η", "θ", "ι", "κ", "λ", "μ",
})
void decodesUnicodeCharacters(final String unicode) throws ImpossibleModificationException {
final String xml = new Xembler(
new DirectivesValue(unicode)
).xml();
System.out.println(xml);
MatcherAssert.assertThat(
"Can't decode unicode characters",
new XmlValue(
new XmlNode(
new Xembler(
new DirectivesValue(unicode)
).xml()
xml
)
).string(),
Matchers.equalTo(unicode)
);
}

@Test
void test() {
System.out.println(
new String(new byte[]{(byte) 0xCE, (byte) 0xA6}, StandardCharsets.UTF_8));
// CEA6
// new String({ce, a6});
}

/**
* Arguments for {@link XmlValue#decodesEncodesCorrectly(Object, String)}.
* @return Stream of arguments.
Expand Down

0 comments on commit a9eceed

Please sign in to comment.