diff --git a/README.md b/README.md index 813117d..3938aab 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ All implementations share the same design philosophy and provide feature parity. - **JSON/Lino Conversion**: Convert between JSON and Links Notation (JavaScript) - **Reference Escaping**: Properly escape strings for Links Notation format (JavaScript) - **Fuzzy Matching**: String similarity utilities for finding matches (JavaScript) +- **Indented Format**: Human-readable indented Links Notation format for display and debugging ## Quick Start @@ -256,6 +257,69 @@ var data = new Dictionary var decoded = Codec.Decode(Codec.Encode(data)); ``` +### Indented Links Notation Format + +The indented format provides a human-readable representation for displaying objects: + +**JavaScript:** +```javascript +import { formatIndented, parseIndented } from 'lino-objects-codec'; + +// Format an object with an identifier +const formatted = formatIndented({ + id: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', + obj: { uuid: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', status: 'executed', command: 'echo test', exitCode: '0' } +}); +console.log(formatted); +// Output: +// 6dcf4c1b-ff3f-482c-95ab-711ea7d1b019 +// uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" +// status "executed" +// command "echo test" +// exitCode "0" + +// Parse it back +const { id, obj } = parseIndented({ text: formatted }); +``` + +**Python:** +```python +from link_notation_objects_codec import format_indented, parse_indented + +# Format an object with an identifier +formatted = format_indented( + '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', + {'uuid': '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', 'status': 'executed'} +) + +# Parse it back +id, obj = parse_indented(formatted) +``` + +**Rust:** +```rust +use lino_objects_codec::format::{format_indented_ordered, parse_indented}; + +// Format an object with an identifier +let pairs = [("status", "executed"), ("exitCode", "0")]; +let formatted = format_indented_ordered("my-uuid", &pairs, " ").unwrap(); + +// Parse it back +let (id, obj) = parse_indented(&formatted).unwrap(); +``` + +**C#:** +```csharp +using Lino.Objects.Codec; + +// Format an object with an identifier +var obj = new Dictionary { { "status", "executed" }, { "exitCode", "0" } }; +var formatted = Format.FormatIndented("my-uuid", obj); + +// Parse it back +var (id, parsedObj) = Format.ParseIndented(formatted); +``` + ## How It Works The library uses the [links-notation](https://github.com/link-foundation/links-notation) format as the serialization target. Each object is encoded as a Link with type information: diff --git a/csharp/src/Lino.Objects.Codec/Lino.Objects.Codec.csproj b/csharp/src/Lino.Objects.Codec/Lino.Objects.Codec.csproj index f1dcedb..6439efe 100644 --- a/csharp/src/Lino.Objects.Codec/Lino.Objects.Codec.csproj +++ b/csharp/src/Lino.Objects.Codec/Lino.Objects.Codec.csproj @@ -5,7 +5,7 @@ enable enable Lino.Objects.Codec - 0.1.0 + 0.2.0 Link Foundation A library to encode/decode objects to/from links notation Unlicense diff --git a/csharp/src/Lino.Objects.Codec/ObjectCodec.cs b/csharp/src/Lino.Objects.Codec/ObjectCodec.cs index df72bf5..96e5b79 100644 --- a/csharp/src/Lino.Objects.Codec/ObjectCodec.cs +++ b/csharp/src/Lino.Objects.Codec/ObjectCodec.cs @@ -609,3 +609,308 @@ public static class Codec /// Reconstructed C# object public static object? Decode(string notation) => new ObjectCodec().Decode(notation); } + +/// +/// Formatting utilities for indented Links Notation format. +/// +public static class Format +{ + /// + /// Escape a reference for Links Notation. + /// References need escaping when they contain spaces, quotes, parentheses, colons, or newlines. + /// + /// The value to escape + /// The escaped reference string + public static string EscapeReference(string value) + { + // Check if escaping is needed + bool needsEscaping = value.Any(c => char.IsWhiteSpace(c) || c == '(' || c == ')' || c == '\'' || c == '"' || c == ':') + || value.Contains('\n'); + + if (!needsEscaping) + { + return value; + } + + bool hasSingle = value.Contains('\''); + bool hasDouble = value.Contains('"'); + + // If contains single quotes but not double quotes, use double quotes + if (hasSingle && !hasDouble) + { + return $"\"{value}\""; + } + + // If contains double quotes but not single quotes, use single quotes + if (hasDouble && !hasSingle) + { + return $"'{value}'"; + } + + // If contains both quotes, count which one appears more + if (hasSingle && hasDouble) + { + int singleCount = value.Count(c => c == '\''); + int doubleCount = value.Count(c => c == '"'); + + if (doubleCount < singleCount) + { + // Use double quotes, escape internal double quotes by doubling + var escaped = value.Replace("\"", "\"\""); + return $"\"{escaped}\""; + } + else + { + // Use single quotes, escape internal single quotes by doubling + var escaped = value.Replace("'", "''"); + return $"'{escaped}'"; + } + } + + // Just spaces or other special characters, use single quotes by default + return $"'{value}'"; + } + + /// + /// Unescape a reference from Links Notation format. + /// Reverses the escaping done by EscapeReference. + /// + /// The escaped reference string + /// The unescaped string + public static string UnescapeReference(string str) + { + if (str is null) return str!; + + // Unescape doubled quotes + return str.Replace("\"\"", "\"").Replace("''", "'"); + } + + // Shared parser instance for ParseIndented + private static readonly Parser SharedParser = new(); + + /// + /// Format a value for display in indented Links Notation. + /// Uses quoting strategy compatible with the links-notation parser: + /// - If value contains double quotes, wrap in single quotes + /// - Otherwise, wrap in double quotes + /// + private static string FormatIndentedValue(string? value) + { + if (value is null) + { + return "\"null\""; + } + + bool hasSingle = value.Contains('\''); + bool hasDouble = value.Contains('"'); + + // If contains double quotes but no single quotes, use single quotes + if (hasDouble && !hasSingle) + { + return $"'{value}'"; + } + + // If contains single quotes but no double quotes, use double quotes + if (hasSingle && !hasDouble) + { + return $"\"{value}\""; + } + + // If contains both, use single quotes and escape internal single quotes + if (hasSingle && hasDouble) + { + var escaped = value.Replace("'", "''"); + return $"'{escaped}'"; + } + + // Default: use double quotes + return $"\"{value}\""; + } + + /// + /// Format an object in indented Links Notation format. + /// + /// This format is designed for human readability, displaying objects as: + /// + /// <identifier> + /// <key> "<value>" + /// <key> "<value>" + /// ... + /// + /// + /// The object identifier (displayed on first line) + /// The dictionary with key-value pairs to format + /// The indentation string (default: 2 spaces) + /// Formatted indented Links Notation string + /// If id is null or empty + /// + /// + /// var obj = new Dictionary<string, string> + /// { + /// { "status", "executed" }, + /// { "exitCode", "0" } + /// }; + /// var result = Format.FormatIndented("my-uuid", obj); + /// + /// + public static string FormatIndented(string id, IDictionary obj, string indent = " ") + { + if (string.IsNullOrEmpty(id)) + { + throw new ArgumentException("id is required for FormatIndented", nameof(id)); + } + + if (obj is null) + { + throw new ArgumentNullException(nameof(obj), "obj must be a dictionary for FormatIndented"); + } + + var lines = new List { id }; + + foreach (var kvp in obj) + { + var escapedKey = EscapeReference(kvp.Key); + var formattedValue = FormatIndentedValue(kvp.Value); + lines.Add($"{indent}{escapedKey} {formattedValue}"); + } + + return string.Join("\n", lines); + } + + /// + /// Format an object in indented Links Notation format, maintaining key order. + /// This is similar to FormatIndented but takes an array of tuples to preserve + /// the order of keys. + /// + /// The object identifier (displayed on first line) + /// The key-value pairs in order + /// The indentation string (default: 2 spaces) + /// Formatted indented Links Notation string + public static string FormatIndentedOrdered(string id, (string Key, string? Value)[] pairs, string indent = " ") + { + if (string.IsNullOrEmpty(id)) + { + throw new ArgumentException("id is required for FormatIndentedOrdered", nameof(id)); + } + + var lines = new List { id }; + + foreach (var (key, value) in pairs) + { + var escapedKey = EscapeReference(key); + var formattedValue = FormatIndentedValue(value); + lines.Add($"{indent}{escapedKey} {formattedValue}"); + } + + return string.Join("\n", lines); + } + + /// + /// Parse an indented Links Notation string back to an object. + /// + /// This function uses the links-notation parser for proper parsing, + /// supporting the standard Links Notation indented syntax. + /// + /// Parses strings like: + /// + /// <identifier> + /// <key> "<value>" + /// <key> "<value>" + /// ... + /// + /// + /// The format with colon after identifier is also supported (standard lino): + /// + /// <identifier>: + /// <key> "<value>" + /// + /// + /// The indented Links Notation string to parse + /// A tuple of (id, dictionary of key-value pairs) + /// If text is null or empty + /// + /// + /// var text = "my-uuid\n status \"executed\"\n exitCode \"0\""; + /// var (id, obj) = Format.ParseIndented(text); + /// // id = "my-uuid" + /// // obj["status"] = "executed" + /// + /// + public static (string Id, Dictionary Obj) ParseIndented(string text) + { + if (string.IsNullOrEmpty(text)) + { + throw new ArgumentException("text is required for ParseIndented", nameof(text)); + } + + var lines = text.Split('\n'); + if (lines.Length == 0) + { + throw new ArgumentException("text must have at least one line (the identifier)", nameof(text)); + } + + // Filter out empty lines to preserve indentation structure for the parser + // Empty lines would break the indentation context in links-notation + var nonEmptyLines = lines.Where(l => !string.IsNullOrWhiteSpace(l)).ToArray(); + + if (nonEmptyLines.Length == 0) + { + throw new ArgumentException("text must have at least one non-empty line (the identifier)", nameof(text)); + } + + // Convert to standard lino format by adding colon after first line if not present + // This allows the links-notation parser to properly parse the indented structure + var firstLine = nonEmptyLines[0].Trim(); + string linoText; + if (firstLine.EndsWith(':')) + { + linoText = string.Join("\n", nonEmptyLines); + } + else + { + linoText = $"{firstLine}:\n{string.Join("\n", nonEmptyLines.Skip(1))}"; + } + + // Use links-notation parser + var parsed = SharedParser.Parse(linoText); + + if (parsed is null || parsed.Count == 0) + { + throw new ArgumentException("Failed to parse indented Links Notation", nameof(text)); + } + + // Extract id and key-value pairs from parsed result + var mainLink = parsed[0]; + var resultId = mainLink.Id ?? ""; + var obj = new Dictionary(); + + // Process the values list - each entry is a doublet (key value) + if (mainLink.Values is not null) + { + foreach (var child in mainLink.Values) + { + if (child.Values is not null && child.Values.Count == 2) + { + var keyRef = child.Values[0]; + var valueRef = child.Values[1]; + + // Get key string + var key = keyRef.Id ?? ""; + + // Get value string, handling null + var valueStr = valueRef.Id; + if (valueStr == "null") + { + obj[key] = null; + } + else + { + obj[key] = valueStr; + } + } + } + } + + return (resultId, obj); + } +} diff --git a/csharp/tests/Lino.Objects.Codec.Tests/FormatTests.cs b/csharp/tests/Lino.Objects.Codec.Tests/FormatTests.cs new file mode 100644 index 0000000..a399f61 --- /dev/null +++ b/csharp/tests/Lino.Objects.Codec.Tests/FormatTests.cs @@ -0,0 +1,176 @@ +// Tests for Format class (FormatIndented, ParseIndented) + +using Xunit; +using Lino.Objects.Codec; + +namespace Lino.Objects.Codec.Tests; + +public class FormatTests +{ + [Fact] + public void EscapeReference_SimpleString() + { + Assert.Equal("hello", Format.EscapeReference("hello")); + Assert.Equal("world", Format.EscapeReference("world")); + } + + [Fact] + public void EscapeReference_StringWithSpaces() + { + var result = Format.EscapeReference("hello world"); + Assert.True(result.StartsWith("'") || result.StartsWith("\"")); + Assert.Contains("hello world", result); + } + + [Fact] + public void EscapeReference_StringWithSingleQuotes() + { + var result = Format.EscapeReference("it's"); + Assert.Equal("\"it's\"", result); + } + + [Fact] + public void EscapeReference_StringWithDoubleQuotes() + { + var result = Format.EscapeReference("he said \"hello\""); + Assert.Equal("'he said \"hello\"'", result); + } + + [Fact] + public void UnescapeReference_DoubledQuotes() + { + Assert.Equal("he said \"hello\"", Format.UnescapeReference("he said \"\"hello\"\"")); + Assert.Equal("it's", Format.UnescapeReference("it''s")); + } + + [Fact] + public void FormatIndentedOrdered_Basic() + { + var pairs = new (string Key, string? Value)[] + { + ("uuid", "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"), + ("status", "executed"), + ("command", "echo test"), + ("exitCode", "0") + }; + var result = Format.FormatIndentedOrdered("6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", pairs); + var lines = result.Split('\n'); + Assert.Equal("6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", lines[0]); + Assert.Equal(" uuid \"6dcf4c1b-ff3f-482c-95ab-711ea7d1b019\"", lines[1]); + Assert.Equal(" status \"executed\"", lines[2]); + Assert.Equal(" command \"echo test\"", lines[3]); + Assert.Equal(" exitCode \"0\"", lines[4]); + } + + [Fact] + public void FormatIndentedOrdered_CustomIndentation() + { + var pairs = new (string Key, string? Value)[] { ("key", "value") }; + var result = Format.FormatIndentedOrdered("test-id", pairs, " "); + var lines = result.Split('\n'); + Assert.Equal("test-id", lines[0]); + Assert.Equal(" key \"value\"", lines[1]); + } + + [Fact] + public void FormatIndentedOrdered_ValueWithQuotes() + { + // Values containing double quotes are wrapped in single quotes (links-notation style) + var pairs = new (string Key, string? Value)[] { ("message", "He said \"hello\"") }; + var result = Format.FormatIndentedOrdered("test-id", pairs); + var lines = result.Split('\n'); + Assert.Equal(" message 'He said \"hello\"'", lines[1]); + } + + [Fact] + public void FormatIndented_RequiresId() + { + var obj = new Dictionary { { "key", "value" } }; + Assert.Throws(() => Format.FormatIndented("", obj)); + } + + [Fact] + public void ParseIndented_Basic() + { + var text = "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019\n uuid \"6dcf4c1b-ff3f-482c-95ab-711ea7d1b019\"\n status \"executed\"\n exitCode \"0\""; + var (id, obj) = Format.ParseIndented(text); + Assert.Equal("6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", id); + Assert.Equal("6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", obj["uuid"]); + Assert.Equal("executed", obj["status"]); + Assert.Equal("0", obj["exitCode"]); + } + + [Fact] + public void ParseIndented_WithQuotes() + { + // Links-notation style: use single quotes to wrap value containing double quotes + var text = "test-id\n message 'He said \"hello\"'"; + var (id, obj) = Format.ParseIndented(text); + Assert.Equal("test-id", id); + Assert.Equal("He said \"hello\"", obj["message"]); + } + + [Fact] + public void ParseIndented_EmptyLinesSkipped() + { + var text = "test-id\n\n key \"value\"\n\n another \"value2\""; + var (id, obj) = Format.ParseIndented(text); + Assert.Equal("test-id", id); + Assert.Equal("value", obj["key"]); + Assert.Equal("value2", obj["another"]); + } + + [Fact] + public void ParseIndented_RequiresText() + { + Assert.Throws(() => Format.ParseIndented("")); + } + + [Fact] + public void RoundtripFormatIndented_Basic() + { + var pairs = new (string Key, string? Value)[] + { + ("uuid", "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"), + ("status", "executed"), + ("command", "echo test"), + ("exitCode", "0") + }; + var formatted = Format.FormatIndentedOrdered("6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", pairs); + var (parsedId, parsedObj) = Format.ParseIndented(formatted); + + Assert.Equal("6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", parsedId); + foreach (var (key, value) in pairs) + { + Assert.Equal(value, parsedObj[key]); + } + } + + [Fact] + public void RoundtripFormatIndented_WithQuotes() + { + var pairs = new (string Key, string? Value)[] { ("message", "He said \"hello\"") }; + var formatted = Format.FormatIndentedOrdered("test-id", pairs); + var (parsedId, parsedObj) = Format.ParseIndented(formatted); + + Assert.Equal("test-id", parsedId); + Assert.Equal("He said \"hello\"", parsedObj["message"]); + } + + [Fact] + public void FormatIndented_WithNullValue() + { + var obj = new Dictionary { { "key", null } }; + var result = Format.FormatIndented("test-id", obj); + var lines = result.Split('\n'); + Assert.Equal(" key \"null\"", lines[1]); + } + + [Fact] + public void ParseIndented_NullValue() + { + var text = "test-id\n key \"null\""; + var (_, obj) = Format.ParseIndented(text); + Assert.Null(obj["key"]); + } +} diff --git a/experiments/test_escaped_quotes.js b/experiments/test_escaped_quotes.js new file mode 100644 index 0000000..53d6102 --- /dev/null +++ b/experiments/test_escaped_quotes.js @@ -0,0 +1,34 @@ +// Test parsing escaped quotes +import { Parser, Link, LinksGroup, formatLinks } from '../js/node_modules/links-notation/dist/index.js'; + +const parser = new Parser(); + +// Test escaped quotes in lino format +const escapedFormat = `test-id: + message "He said ""hello"""`; + +console.log("=== Testing escaped quotes ==="); +console.log("Input:"); +console.log(escapedFormat); + +try { + const result = parser.parse(escapedFormat); + console.log("\nParsed result:", JSON.stringify(result, null, 2)); +} catch (e) { + console.log("Parse error:", e.message); +} + +// Test with single quotes +const singleQuoteFormat = `test-id: + message 'He said "hello"'`; + +console.log("\n=== Testing single quotes wrapping double quotes ==="); +console.log("Input:"); +console.log(singleQuoteFormat); + +try { + const result = parser.parse(singleQuoteFormat); + console.log("\nParsed result:", JSON.stringify(result, null, 2)); +} catch (e) { + console.log("Parse error:", e.message); +} diff --git a/experiments/test_lino_format.js b/experiments/test_lino_format.js new file mode 100644 index 0000000..babf76a --- /dev/null +++ b/experiments/test_lino_format.js @@ -0,0 +1,69 @@ +// Test what links-notation can parse and format +import { Parser, Link, LinksGroup, formatLinks } from '../js/node_modules/links-notation/dist/index.js'; + +const parser = new Parser(); + +// Test the requested indented format from issue #17 +const indentedFormat = `6dcf4c1b-ff3f-482c-95ab-711ea7d1b019 + uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + status "executed" + command "echo test" + exitCode "0"`; + +console.log("=== Testing parsing of indented format ==="); +console.log("Input:"); +console.log(indentedFormat); +console.log("\n--- Parser output ---"); +try { + const result = parser.parse(indentedFormat); + console.log("Parsed result:", JSON.stringify(result, null, 2)); +} catch (e) { + console.log("Parse error:", e.message); +} + +// Test similar format with colon (standard lino indented syntax) +const linoIndented = `myId: + uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + status "executed"`; + +console.log("\n=== Testing parsing of standard lino indented format ==="); +console.log("Input:"); +console.log(linoIndented); +console.log("\n--- Parser output ---"); +try { + const result = parser.parse(linoIndented); + console.log("Parsed result:", JSON.stringify(result, null, 2)); +} catch (e) { + console.log("Parse error:", e.message); +} + +// Test simple doublet format +const doublets = `uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" +status "executed" +command "echo test" +exitCode "0"`; + +console.log("\n=== Testing parsing of doublets ==="); +console.log("Input:"); +console.log(doublets); +console.log("\n--- Parser output ---"); +try { + const result = parser.parse(doublets); + console.log("Parsed result:", JSON.stringify(result, null, 2)); +} catch (e) { + console.log("Parse error:", e.message); +} + +// Test formatting with Link +console.log("\n=== Testing Link formatting ==="); +try { + const link = new Link('myId', [ + new Link('uuid', [new Link('"6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"')]), + new Link('status', [new Link('"executed"')]), + ]); + console.log("Link toString:", link.toString()); + console.log("Link format(true):", link.format(true)); + console.log("Link format(false):", link.format(false)); +} catch (e) { + console.log("Format error:", e.message); +} diff --git a/experiments/test_using_lino_for_indented.js b/experiments/test_using_lino_for_indented.js new file mode 100644 index 0000000..f12623e --- /dev/null +++ b/experiments/test_using_lino_for_indented.js @@ -0,0 +1,68 @@ +// Test using links-notation parser for indented format +import { Parser, Link, LinksGroup, formatLinks } from '../js/node_modules/links-notation/dist/index.js'; + +const parser = new Parser(); + +// The issue format (without colon after identifier) +const issueFormat = `6dcf4c1b-ff3f-482c-95ab-711ea7d1b019 + uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + status "executed" + command "echo test" + exitCode "0"`; + +// Convert to standard lino format by adding colon after first line +function convertToLinoFormat(text) { + const lines = text.split('\n'); + if (lines.length === 0) return text; + // Add colon to first line if not present + if (!lines[0].trim().endsWith(':')) { + lines[0] = lines[0].trim() + ':'; + } + return lines.join('\n'); +} + +console.log("=== Original format from issue ==="); +console.log(issueFormat); + +const linoFormat = convertToLinoFormat(issueFormat); +console.log("\n=== Converted to standard lino format ==="); +console.log(linoFormat); + +console.log("\n=== Parsing converted format ==="); +try { + const result = parser.parse(linoFormat); + console.log("Parsed result:", JSON.stringify(result, null, 2)); + + // Extract id and obj from parsed result + if (result.length > 0) { + const mainLink = result[0]; + const id = mainLink.id; + const obj = {}; + + for (const child of mainLink.values || []) { + if (child.values && child.values.length === 2) { + const key = child.values[0].id; + const value = child.values[1].id; + obj[key] = value; + } + } + + console.log("\n=== Extracted data ==="); + console.log("ID:", id); + console.log("Object:", JSON.stringify(obj, null, 2)); + } +} catch (e) { + console.log("Parse error:", e.message); +} + +// Now test formatting back +console.log("\n=== Formatting with links-notation ==="); +const link = new Link('6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', [ + new Link(null, [new Link('uuid'), new Link('6dcf4c1b-ff3f-482c-95ab-711ea7d1b019')]), + new Link(null, [new Link('status'), new Link('executed')]), + new Link(null, [new Link('command'), new Link('echo test')]), + new Link(null, [new Link('exitCode'), new Link('0')]), +]); + +console.log("Format as standard lino:", link.format(true)); +console.log("Format as lino with parens:", link.format(false)); diff --git a/js/.changeset/add-indented-format.md b/js/.changeset/add-indented-format.md new file mode 100644 index 0000000..8c42916 --- /dev/null +++ b/js/.changeset/add-indented-format.md @@ -0,0 +1,24 @@ +--- +'lino-objects-codec': minor +--- + +Add indented Links Notation format support for human-readable object display. + +New functions: + +- `formatIndented({ id, obj, indent })` - Format an object with identifier in indented style +- `parseIndented({ text })` - Parse indented format back to { id, obj } + +The indented format displays objects as: + +``` + + "" + "" + ... +``` + +Also adds: + +- `escapeReference()` for escaping values with special characters +- `unescapeReference()` for reversing escape sequences diff --git a/js/package-lock.json b/js/package-lock.json index 3193537..402b418 100644 --- a/js/package-lock.json +++ b/js/package-lock.json @@ -1,12 +1,12 @@ { "name": "lino-objects-codec", - "version": "0.1.1", + "version": "0.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lino-objects-codec", - "version": "0.1.1", + "version": "0.2.0", "license": "Unlicense", "dependencies": { "links-notation": "^0.11.0" diff --git a/js/package.json b/js/package.json index c592393..54e37eb 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "lino-objects-codec", - "version": "0.1.1", + "version": "0.2.0", "description": "A library to encode/decode objects to/from links notation", "type": "module", "main": "./src/index.js", diff --git a/js/src/format.js b/js/src/format.js index 280a5f6..a42479a 100644 --- a/js/src/format.js +++ b/js/src/format.js @@ -332,3 +332,182 @@ export function formatAsLino(options = {}) { const formattedValues = values.map((value) => ` ${value}`).join('\n'); return `(\n${formattedValues}\n)`; } + +/** + * Format a value for display in indented Links Notation. + * Uses quoting strategy compatible with the links-notation parser: + * - If value contains double quotes, wrap in single quotes + * - Otherwise, wrap in double quotes + * + * @private + * @param {*} value - The value to format + * @returns {string} Formatted value with appropriate quotes + */ +function formatIndentedValue(value) { + if (value === null || value === undefined) { + return '"null"'; + } + + const str = String(value); + + // If contains double quotes but no single quotes, use single quotes + if (str.includes('"') && !str.includes("'")) { + return `'${str}'`; + } + + // If contains single quotes but no double quotes, use double quotes + if (str.includes("'") && !str.includes('"')) { + return `"${str}"`; + } + + // If contains both, use single quotes and escape internal single quotes + if (str.includes("'") && str.includes('"')) { + const escaped = str.replace(/'/g, "''"); + return `'${escaped}'`; + } + + // Default: use double quotes + return `"${str}"`; +} + +/** + * Format an object in indented Links Notation format. + * + * This format is designed for human readability, displaying objects as: + * ``` + * + * "" + * "" + * ... + * ``` + * + * Example: + * formatIndented({ + * id: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', + * obj: { uuid: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', status: 'executed', command: 'echo test', exitCode: '0' } + * }) + * + * Returns: + * 6dcf4c1b-ff3f-482c-95ab-711ea7d1b019 + * uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + * status "executed" + * command "echo test" + * exitCode "0" + * + * @param {Object} options - Options + * @param {string} options.id - The object identifier (displayed on first line) + * @param {Object} options.obj - The object with key-value pairs to format + * @param {string} [options.indent=' '] - The indentation string (default: 2 spaces) + * @returns {string} Formatted indented Links Notation string + */ +export function formatIndented(options = {}) { + const { id, obj, indent = ' ' } = options; + + if (!id) { + throw new Error('id is required for formatIndented'); + } + + if (!obj || typeof obj !== 'object' || Array.isArray(obj)) { + throw new Error('obj must be a plain object for formatIndented'); + } + + const lines = [id]; + + for (const [key, value] of Object.entries(obj)) { + const escapedKey = escapeReference({ value: key }); + const formattedValue = formatIndentedValue(value); + lines.push(`${indent}${escapedKey} ${formattedValue}`); + } + + return lines.join('\n'); +} + +/** + * Parse an indented Links Notation string back to an object. + * + * This function uses the links-notation parser for proper parsing, + * supporting the standard Links Notation indented syntax. + * + * Parses strings like: + * ``` + * + * "" + * "" + * ... + * ``` + * + * The format with colon after identifier is also supported (standard lino): + * ``` + * : + * "" + * ``` + * + * @param {Object} options - Options + * @param {string} options.text - The indented Links Notation string to parse + * @returns {{ id: string, obj: Object }} Object with id and parsed key-value pairs + */ +export function parseIndented(options = {}) { + const { text } = options; + + if (!text || typeof text !== 'string') { + throw new Error('text is required for parseIndented'); + } + + const lines = text.split('\n'); + if (lines.length === 0) { + throw new Error('text must have at least one line (the identifier)'); + } + + // Filter out empty lines to preserve indentation structure for the parser + // Empty lines would break the indentation context in links-notation + const nonEmptyLines = lines.filter((line) => line.trim()); + + if (nonEmptyLines.length === 0) { + throw new Error( + 'text must have at least one non-empty line (the identifier)' + ); + } + + // Convert to standard lino format by adding colon after first line if not present + // This allows the links-notation parser to properly parse the indented structure + const firstLine = nonEmptyLines[0].trim(); + let linoText; + if (!firstLine.endsWith(':')) { + linoText = `${firstLine}:\n${nonEmptyLines.slice(1).join('\n')}`; + } else { + linoText = nonEmptyLines.join('\n'); + } + + // Use links-notation parser + const parsed = parser.parse(linoText); + + if (!parsed || parsed.length === 0) { + throw new Error('Failed to parse indented Links Notation'); + } + + // Extract id and key-value pairs from parsed result + const mainLink = parsed[0]; + const id = mainLink.id || ''; + const obj = {}; + + // Process the values array - each entry is a doublet (key value) + for (const child of mainLink.values || []) { + if (child.values && child.values.length === 2) { + const keyRef = child.values[0]; + const valueRef = child.values[1]; + + // Get key string + const key = keyRef.id || ''; + + // Get value string, handling null + const valueStr = valueRef.id; + if (valueStr === 'null') { + obj[key] = null; + } else { + obj[key] = valueStr; + } + } + } + + return { id, obj }; +} diff --git a/js/src/index.js b/js/src/index.js index 365dccb..b6716ab 100644 --- a/js/src/index.js +++ b/js/src/index.js @@ -24,6 +24,8 @@ export { jsonToLino, linoToJson, formatAsLino, + formatIndented, + parseIndented, } from './format.js'; // Fuzzy matching utilities diff --git a/js/tests/test_format.test.js b/js/tests/test_format.test.js index cd61e24..f7610d1 100644 --- a/js/tests/test_format.test.js +++ b/js/tests/test_format.test.js @@ -10,6 +10,8 @@ import { jsonToLino, linoToJson, formatAsLino, + formatIndented, + parseIndented, } from '../src/index.js'; // Tests for escapeReference @@ -251,3 +253,156 @@ test('formatAsLino - array of values', () => { assert.ok(result.startsWith('(')); assert.ok(result.endsWith(')')); }); + +// Tests for formatIndented +test('formatIndented - basic object', () => { + const result = formatIndented({ + id: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', + obj: { + uuid: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', + status: 'executed', + command: 'echo test', + exitCode: '0', + }, + }); + const lines = result.split('\n'); + assert.equal(lines[0], '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019'); + assert.equal(lines[1], ' uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"'); + assert.equal(lines[2], ' status "executed"'); + assert.equal(lines[3], ' command "echo test"'); + assert.equal(lines[4], ' exitCode "0"'); +}); + +test('formatIndented - custom indentation', () => { + const result = formatIndented({ + id: 'test-id', + obj: { key: 'value' }, + indent: ' ', // 4 spaces + }); + const lines = result.split('\n'); + assert.equal(lines[0], 'test-id'); + assert.equal(lines[1], ' key "value"'); +}); + +test('formatIndented - value with double quotes', () => { + // Values containing double quotes are wrapped in single quotes (links-notation style) + const result = formatIndented({ + id: 'test-id', + obj: { message: 'He said "hello"' }, + }); + const lines = result.split('\n'); + assert.equal(lines[0], 'test-id'); + assert.equal(lines[1], ` message 'He said "hello"'`); +}); + +test('formatIndented - key with space', () => { + const result = formatIndented({ + id: 'test-id', + obj: { 'key with space': 'value' }, + }); + const lines = result.split('\n'); + assert.equal(lines[0], 'test-id'); + assert.ok( + lines[1].includes("'key with space'") || + lines[1].includes('"key with space"') + ); +}); + +test('formatIndented - null value', () => { + const result = formatIndented({ + id: 'test-id', + obj: { key: null }, + }); + const lines = result.split('\n'); + assert.equal(lines[0], 'test-id'); + assert.equal(lines[1], ' key "null"'); +}); + +test('formatIndented - requires id', () => { + assert.throws(() => formatIndented({ obj: { key: 'value' } }), { + message: 'id is required for formatIndented', + }); +}); + +test('formatIndented - requires plain object', () => { + assert.throws(() => formatIndented({ id: 'test', obj: [1, 2, 3] }), { + message: 'obj must be a plain object for formatIndented', + }); +}); + +// Tests for parseIndented +test('parseIndented - basic object', () => { + const text = `6dcf4c1b-ff3f-482c-95ab-711ea7d1b019 + uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + status "executed" + command "echo test" + exitCode "0"`; + + const result = parseIndented({ text }); + assert.equal(result.id, '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019'); + assert.equal(result.obj.uuid, '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019'); + assert.equal(result.obj.status, 'executed'); + assert.equal(result.obj.command, 'echo test'); + assert.equal(result.obj.exitCode, '0'); +}); + +test('parseIndented - value with quotes', () => { + // Links-notation style: use single quotes to wrap value containing double quotes + const text = `test-id + message 'He said "hello"'`; + + const result = parseIndented({ text }); + assert.equal(result.id, 'test-id'); + assert.equal(result.obj.message, 'He said "hello"'); +}); + +test('parseIndented - empty lines are skipped', () => { + const text = `test-id + + key "value" + + another "value2"`; + + const result = parseIndented({ text }); + assert.equal(result.id, 'test-id'); + assert.equal(result.obj.key, 'value'); + assert.equal(result.obj.another, 'value2'); +}); + +test('parseIndented - requires text', () => { + assert.throws(() => parseIndented({}), { + message: 'text is required for parseIndented', + }); +}); + +// Roundtrip tests for formatIndented/parseIndented +test('formatIndented/parseIndented roundtrip - basic', () => { + const original = { + id: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', + obj: { + uuid: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', + status: 'executed', + command: 'echo test', + exitCode: '0', + }, + }; + + const formatted = formatIndented(original); + const parsed = parseIndented({ text: formatted }); + + assert.equal(parsed.id, original.id); + assert.deepEqual(parsed.obj, original.obj); +}); + +test('formatIndented/parseIndented roundtrip - with quotes', () => { + const original = { + id: 'test-id', + obj: { message: 'He said "hello"' }, + }; + + const formatted = formatIndented(original); + const parsed = parseIndented({ text: formatted }); + + assert.equal(parsed.id, original.id); + assert.deepEqual(parsed.obj, original.obj); +}); diff --git a/python/examples/basic_usage.py b/python/examples/basic_usage.py index 47f19b2..3dd8495 100644 --- a/python/examples/basic_usage.py +++ b/python/examples/basic_usage.py @@ -64,7 +64,7 @@ def main(): # Self-referencing list lst = [1, 2, 3] lst.append(lst) - print(f" Created self-referencing list") + print(" Created self-referencing list") encoded_circular = encode(lst) print(f" Encoded: {encoded_circular}") decoded_circular = decode(encoded_circular) @@ -75,7 +75,7 @@ def main(): # Self-referencing dict d = {"name": "root"} d["self"] = d - print(f"\n Created self-referencing dict") + print("\n Created self-referencing dict") encoded_dict_circular = encode(d) print(f" Encoded: {encoded_dict_circular}") decoded_dict_circular = decode(encoded_dict_circular) @@ -89,7 +89,7 @@ def main(): print("\n5. Shared Object References:") shared = {"shared": "data", "value": 42} container = {"first": shared, "second": shared, "third": shared} - print(f" Created container with 3 references to same object") + print(" Created container with 3 references to same object") encoded_shared = encode(container) print(f" Encoded: {encoded_shared}") decoded_shared = decode(encoded_shared) diff --git a/python/experiments/debug_decode.py b/python/experiments/debug_decode.py index e23dc37..0e48b21 100644 --- a/python/experiments/debug_decode.py +++ b/python/experiments/debug_decode.py @@ -22,7 +22,9 @@ print(f" values: {val.values}") if val.values: for j, subval in enumerate(val.values): - print(f" Subvalue {j}: id={subval.id}, has_values={bool(subval.values)}") + print( + f" Subvalue {j}: id={subval.id}, has_values={bool(subval.values)}" + ) if subval.values: for k, subsubval in enumerate(subval.values): print(f" Subsubvalue {k}: id={subsubval.id}") diff --git a/python/experiments/debug_decoder.py b/python/experiments/debug_decoder.py index 2980ce9..51032be 100644 --- a/python/experiments/debug_decoder.py +++ b/python/experiments/debug_decoder.py @@ -3,12 +3,13 @@ import sys import os -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) from link_notation_objects_codec.codec import ObjectCodec from links_notation import Parser -notation = '(obj_0: dict ((str bmFtZQ==) (str ZDE=)) ((str b3RoZXI=) (obj_1: dict ((str bmFtZQ==) (str ZDI=)) ((str b3RoZXI=) obj_0))))' +notation = "(obj_0: dict ((str bmFtZQ==) (str ZDE=)) ((str b3RoZXI=) (obj_1: dict ((str bmFtZQ==) (str ZDI=)) ((str b3RoZXI=) obj_0))))" print(f"Input: {notation}") parser = Parser() @@ -17,13 +18,17 @@ if parsed: link = parsed[0] - print(f"\nLink details:") + print("\nLink details:") print(f" link.id: {link.id}") - print(f" link.id.startswith('obj_'): {link.id.startswith('obj_') if link.id else False}") + print( + f" link.id.startswith('obj_'): {link.id.startswith('obj_') if link.id else False}" + ) print(f" link.values: {link.values}") if link.values: print(f" link.values[0]: {link.values[0]}") - print(f" link.values[0].id: {link.values[0].id if hasattr(link.values[0], 'id') else 'N/A'}") + print( + f" link.values[0].id: {link.values[0].id if hasattr(link.values[0], 'id') else 'N/A'}" + ) codec = ObjectCodec() result = codec._decode_link(link) diff --git a/python/experiments/test_codec_simple.py b/python/experiments/test_codec_simple.py index df5ff5c..dec9ca7 100644 --- a/python/experiments/test_codec_simple.py +++ b/python/experiments/test_codec_simple.py @@ -13,6 +13,7 @@ except Exception as e: print(f"Error: {e}") import traceback + traceback.print_exc() print("\n=== Testing bool ===") @@ -21,10 +22,11 @@ print(f"Encoded: {encoded}") decoded = decode(encoded) print(f"Decoded: {decoded}") - print(f"Match: {decoded == True}") + print(f"Match: {decoded is True}") except Exception as e: print(f"Error: {e}") import traceback + traceback.print_exc() print("\n=== Testing int ===") @@ -37,6 +39,7 @@ except Exception as e: print(f"Error: {e}") import traceback + traceback.print_exc() print("\n=== Testing str ===") @@ -49,6 +52,7 @@ except Exception as e: print(f"Error: {e}") import traceback + traceback.print_exc() print("\n=== Testing list ===") @@ -61,4 +65,5 @@ except Exception as e: print(f"Error: {e}") import traceback + traceback.print_exc() diff --git a/python/experiments/test_correct_format.py b/python/experiments/test_correct_format.py index bbf6bd6..8124fcd 100644 --- a/python/experiments/test_correct_format.py +++ b/python/experiments/test_correct_format.py @@ -12,21 +12,24 @@ print("=" * 60) # Build the structure manually -str_key = Link(values=[Link(link_id='str'), Link(link_id='c2VsZg==')]) # (str c2VsZg==) -obj_0_ref = Link(link_id='obj_0') # Reference to obj_0 +str_key = Link(values=[Link(link_id="str"), Link(link_id="c2VsZg==")]) # (str c2VsZg==) +obj_0_ref = Link(link_id="obj_0") # Reference to obj_0 key_value_pair = Link(values=[str_key, obj_0_ref]) # ((str c2VsZg==) obj_0) # The dict itself with self-reference using (self-ref: contents) syntax # Format: (obj_0: dict obj_0 ((str c2VsZg==) obj_0)) -dict_link = Link(link_id='obj_0', values=[ - Link(link_id='dict'), # Type marker - Link(link_id='obj_0'), # Reference to self (same as the outer obj_0) - key_value_pair # The key-value pair -]) +dict_link = Link( + link_id="obj_0", + values=[ + Link(link_id="dict"), # Type marker + Link(link_id="obj_0"), # Reference to self (same as the outer obj_0) + key_value_pair, # The key-value pair + ], +) encoded = dict_link.format() print(f"Encoded: {encoded}") -print(f"Expected: (obj_0: dict obj_0 ((str c2VsZg==) obj_0))") +print("Expected: (obj_0: dict obj_0 ((str c2VsZg==) obj_0))") print(f"Match: {encoded == '(obj_0: dict obj_0 ((str c2VsZg==) obj_0))'}") # Test parsing back @@ -49,17 +52,21 @@ # Simple dict without self-reference: {"a": 1} # Expected format: (dict ((str YQ==) (int 1))) -simple_dict = Link(values=[ - Link(link_id='dict'), - Link(values=[ - Link(values=[Link(link_id='str'), Link(link_id='YQ==')]), - Link(values=[Link(link_id='int'), Link(link_id='1')]) - ]) -]) +simple_dict = Link( + values=[ + Link(link_id="dict"), + Link( + values=[ + Link(values=[Link(link_id="str"), Link(link_id="YQ==")]), + Link(values=[Link(link_id="int"), Link(link_id="1")]), + ] + ), + ] +) encoded2 = simple_dict.format() print(f"Encoded: {encoded2}") -print(f"Expected: (dict ((str YQ==) (int 1)))") +print("Expected: (dict ((str YQ==) (int 1)))") print("\n" + "=" * 60) print("Test 3: Self-referencing list") @@ -67,16 +74,19 @@ # Self-referencing list: lst = [1, 2, lst] # Expected format: (obj_0: list (int 1) (int 2) obj_0) -list_link = Link(link_id='obj_0', values=[ - Link(link_id='list'), - Link(values=[Link(link_id='int'), Link(link_id='1')]), - Link(values=[Link(link_id='int'), Link(link_id='2')]), - Link(link_id='obj_0') # Reference to self -]) +list_link = Link( + link_id="obj_0", + values=[ + Link(link_id="list"), + Link(values=[Link(link_id="int"), Link(link_id="1")]), + Link(values=[Link(link_id="int"), Link(link_id="2")]), + Link(link_id="obj_0"), # Reference to self + ], +) encoded3 = list_link.format() print(f"Encoded: {encoded3}") -print(f"Expected: (obj_0: list (int 1) (int 2) obj_0)") +print("Expected: (obj_0: list (int 1) (int 2) obj_0)") print("\n" + "=" * 60) print("Test 4: Mutual references") @@ -87,23 +97,29 @@ # Expected: # (obj_0: list (int 1) (int 2) (obj_1: list (int 3) (int 4) obj_0)) -list2_ref = Link(link_id='obj_1', values=[ - Link(link_id='list'), - Link(values=[Link(link_id='int'), Link(link_id='3')]), - Link(values=[Link(link_id='int'), Link(link_id='4')]), - Link(link_id='obj_0') # Reference to list1 -]) - -list1_link = Link(link_id='obj_0', values=[ - Link(link_id='list'), - Link(values=[Link(link_id='int'), Link(link_id='1')]), - Link(values=[Link(link_id='int'), Link(link_id='2')]), - list2_ref # Nested list2 definition -]) +list2_ref = Link( + link_id="obj_1", + values=[ + Link(link_id="list"), + Link(values=[Link(link_id="int"), Link(link_id="3")]), + Link(values=[Link(link_id="int"), Link(link_id="4")]), + Link(link_id="obj_0"), # Reference to list1 + ], +) + +list1_link = Link( + link_id="obj_0", + values=[ + Link(link_id="list"), + Link(values=[Link(link_id="int"), Link(link_id="1")]), + Link(values=[Link(link_id="int"), Link(link_id="2")]), + list2_ref, # Nested list2 definition + ], +) encoded4 = list1_link.format() print(f"Encoded: {encoded4}") -print(f"Expected: (obj_0: list (int 1) (int 2) (obj_1: list (int 3) (int 4) obj_0))") +print("Expected: (obj_0: list (int 1) (int 2) (obj_1: list (int 3) (int 4) obj_0))") print("\n" + "=" * 60) print("Summary: All tests show correct format using (self-ref: ...) syntax") diff --git a/python/experiments/test_decode_issue.py b/python/experiments/test_decode_issue.py index f510645..7a684f0 100644 --- a/python/experiments/test_decode_issue.py +++ b/python/experiments/test_decode_issue.py @@ -2,7 +2,8 @@ """Debug decoder issue.""" import sys -sys.path.insert(0, 'src') + +sys.path.insert(0, "src") from links_notation import Parser @@ -14,19 +15,23 @@ links = parser.parse(encoded) print(f"Number of links: {len(links)}") + def print_link(link, indent=0): prefix = " " * indent print(f"{prefix}Link:") print(f"{prefix} id: {link.id if hasattr(link, 'id') else 'N/A'}") - print(f"{prefix} values: {len(link.values) if hasattr(link, 'values') and link.values else 0}") - if hasattr(link, 'values') and link.values: + print( + f"{prefix} values: {len(link.values) if hasattr(link, 'values') and link.values else 0}" + ) + if hasattr(link, "values") and link.values: for i, val in enumerate(link.values): print(f"{prefix} value[{i}]:") - if hasattr(val, 'id') or hasattr(val, 'values'): + if hasattr(val, "id") or hasattr(val, "values"): print_link(val, indent + 2) else: print(f"{prefix} {val}") + for i, link in enumerate(links): print(f"\n--- Link {i} ---") print_link(link) diff --git a/python/experiments/test_escape.py b/python/experiments/test_escape.py index 3e3a4d3..5bb6302 100644 --- a/python/experiments/test_escape.py +++ b/python/experiments/test_escape.py @@ -10,7 +10,7 @@ print(f"Original: {repr(test_string)}") # Encode to base64 -b64 = base64.b64encode(test_string.encode('utf-8')).decode('ascii') +b64 = base64.b64encode(test_string.encode("utf-8")).decode("ascii") print(f"Base64: {b64}") # Create link with base64 @@ -23,6 +23,6 @@ if parsed and parsed[0].values: recovered_b64 = parsed[0].values[1].id print(f"Recovered base64: {recovered_b64}") - decoded = base64.b64decode(recovered_b64).decode('utf-8') + decoded = base64.b64decode(recovered_b64).decode("utf-8") print(f"Decoded: {repr(decoded)}") print(f"Match: {decoded == test_string}") diff --git a/python/experiments/test_failing_cases.py b/python/experiments/test_failing_cases.py index 66cf609..5089129 100644 --- a/python/experiments/test_failing_cases.py +++ b/python/experiments/test_failing_cases.py @@ -2,7 +2,8 @@ """Test the failing cases to understand the issue.""" import sys -sys.path.insert(0, 'src') + +sys.path.insert(0, "src") from link_notation_objects_codec import encode, decode @@ -19,7 +20,7 @@ print(f" Decoded: {decoded}") print(f" Has 'name': {'name' in decoded}") print(f" Has 'other': {'other' in decoded}") -if 'other' in decoded: +if "other" in decoded: print(f" decoded['other']: {decoded['other']}") print() diff --git a/python/experiments/test_format.py b/python/experiments/test_format.py index 7becadd..eab9074 100644 --- a/python/experiments/test_format.py +++ b/python/experiments/test_format.py @@ -2,7 +2,8 @@ """Quick test to verify the encoder produces the correct format.""" import sys -sys.path.insert(0, 'src') + +sys.path.insert(0, "src") from link_notation_objects_codec import encode, decode @@ -12,7 +13,7 @@ lst.append(lst) encoded = encode(lst) print(f" Encoded: {encoded}") -print(f" Expected: (obj_0: list obj_0)") +print(" Expected: (obj_0: list obj_0)") print(f" Match: {encoded == '(obj_0: list obj_0)'}") print() @@ -22,7 +23,7 @@ d["self"] = d encoded = encode(d) print(f" Encoded: {encoded}") -print(f" Expected: (obj_0: dict ((str c2VsZg==) obj_0))") +print(" Expected: (obj_0: dict ((str c2VsZg==) obj_0))") print(f" Match: {encoded == '(obj_0: dict ((str c2VsZg==) obj_0))'}") print() @@ -34,8 +35,10 @@ list2.append(list1) encoded = encode(list1) print(f" Encoded: {encoded}") -print(f" Expected: (obj_0: list (int 1) (int 2) (obj_1: list (int 3) (int 4) obj_0))") -print(f" Match: {encoded == '(obj_0: list (int 1) (int 2) (obj_1: list (int 3) (int 4) obj_0))'}") +print(" Expected: (obj_0: list (int 1) (int 2) (obj_1: list (int 3) (int 4) obj_0))") +print( + f" Match: {encoded == '(obj_0: list (int 1) (int 2) (obj_1: list (int 3) (int 4) obj_0))'}" +) print() # Test 4: Round-trip diff --git a/python/experiments/test_implementation.py b/python/experiments/test_implementation.py index 554aef1..24225c6 100644 --- a/python/experiments/test_implementation.py +++ b/python/experiments/test_implementation.py @@ -12,7 +12,7 @@ try: decoded = decode(encoded) - print(f"Decoded successfully") + print("Decoded successfully") print(f"Has 'self' key: {'self' in decoded}") print(f"Has 'other' key: {'other' in decoded}") print(f"Self-reference works: {decoded['self'] is decoded}") @@ -20,6 +20,7 @@ except Exception as e: print(f"ERROR: {e}") import traceback + traceback.print_exc() print() @@ -32,12 +33,13 @@ try: decoded2 = decode(encoded2) - print(f"Decoded successfully") + print("Decoded successfully") print(f"List length: {len(decoded2)}") print(f"Self-reference works: {decoded2[0] is decoded2}") except Exception as e: print(f"ERROR: {e}") import traceback + traceback.print_exc() print() @@ -52,11 +54,12 @@ try: decoded3 = decode(encoded3) - print(f"Decoded successfully") + print("Decoded successfully") print(f"List1 length: {len(decoded3)}") print(f"List1[2] length: {len(decoded3[2])}") print(f"Mutual reference works: {decoded3[2][2] is decoded3}") except Exception as e: print(f"ERROR: {e}") import traceback + traceback.print_exc() diff --git a/python/experiments/test_implementation2.py b/python/experiments/test_implementation2.py index 29a759d..a058f58 100644 --- a/python/experiments/test_implementation2.py +++ b/python/experiments/test_implementation2.py @@ -11,7 +11,7 @@ print(f"Encoded: {encoded}") decoded = decode(encoded) -print(f"Decoded successfully") +print("Decoded successfully") print(f"Has 'self' key: {'self' in decoded}") print(f"Has 'other' key: {'other' in decoded}") print(f"Self-reference works: {decoded['self'] is decoded}") @@ -26,7 +26,7 @@ print(f"Encoded: {encoded2}") decoded2 = decode(encoded2) -print(f"Decoded successfully") +print("Decoded successfully") print(f"List length: {len(decoded2)}") print(f"Self-reference works: {decoded2[0] is decoded2}") print() @@ -41,7 +41,7 @@ print(f"Encoded: {encoded3}") decoded3 = decode(encoded3) -print(f"Decoded successfully") +print("Decoded successfully") print(f"Type: {type(decoded3)}") print(f"List1 length: {len(decoded3)}") print(f"List1[0]: {decoded3[0]}") diff --git a/python/experiments/test_links_api.py b/python/experiments/test_links_api.py index 2c47b10..341b917 100644 --- a/python/experiments/test_links_api.py +++ b/python/experiments/test_links_api.py @@ -25,9 +25,9 @@ print(f" Link values: {result2[0].values}") if result2[0].values: print(f" First value type: {type(result2[0].values[0])}") - if hasattr(result2[0].values[0], 'id'): + if hasattr(result2[0].values[0], "id"): print(f" First value id: {result2[0].values[0].id}") - if hasattr(result2[0].values[0], 'values'): + if hasattr(result2[0].values[0], "values"): print(f" First value values: {result2[0].values[0].values}") print(f" Formatted: {format_links(result2)}") @@ -68,8 +68,8 @@ print(f" Number of values: {len(result4[0].values)}") for i, val in enumerate(result4[0].values): print(f" Value {i}: {val} (type: {type(val).__name__})") - if hasattr(val, 'id'): + if hasattr(val, "id"): print(f" id: {val.id}") - if hasattr(val, 'values'): + if hasattr(val, "values"): print(f" values: {val.values}") print(f" Formatted: {format_links(result4)}") diff --git a/python/experiments/test_mutual_dicts.py b/python/experiments/test_mutual_dicts.py index eaa6559..947ccdf 100644 --- a/python/experiments/test_mutual_dicts.py +++ b/python/experiments/test_mutual_dicts.py @@ -1,6 +1,7 @@ """Test mutual reference dicts.""" from link_notation_objects_codec import encode +from links_notation import Parser dict1 = {"name": "dict1"} dict2 = {"name": "dict2"} @@ -11,7 +12,7 @@ print(f"Encoded: {encoded}") # Parse it to see the structure -from links_notation import Parser + parser = Parser() links = parser.parse(encoded) @@ -25,6 +26,6 @@ print(f" ID: '{val.id}'") print(f" Values: {len(val.values) if hasattr(val, 'values') else 0}") - if hasattr(val, 'values') and val.values: + if hasattr(val, "values") and val.values: for j, subval in enumerate(val.values): print(f" Subvalue {j}: ID='{subval.id}'") diff --git a/python/experiments/test_new_impl.py b/python/experiments/test_new_impl.py index 7c563c2..34340d1 100644 --- a/python/experiments/test_new_impl.py +++ b/python/experiments/test_new_impl.py @@ -3,7 +3,8 @@ import sys import os -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) from link_notation_objects_codec import encode, decode @@ -16,7 +17,7 @@ encoded = encode(obj) print(f"Encoded: {encoded}") -print(f"Expected format: (obj_0: dict ((str c2VsZg==) obj_0))") +print("Expected format: (obj_0: dict ((str c2VsZg==) obj_0))") # Decode it back decoded = decode(encoded) @@ -33,7 +34,7 @@ encoded2 = encode(lst) print(f"Encoded: {encoded2}") -print(f"Expected format: (obj_0: list (int 1) (int 2) (int 3) obj_0)") +print("Expected format: (obj_0: list (int 1) (int 2) (int 3) obj_0)") decoded2 = decode(encoded2) print(f"Decoded successfully: {decoded2 is not None}") @@ -65,7 +66,7 @@ simple = {"a": 1, "b": 2} encoded4 = encode(simple) print(f"Encoded: {encoded4}") -print(f"Expected format: (dict ((str ...) (int 1)) ((str ...) (int 2)))") +print("Expected format: (dict ((str ...) (int 1)) ((str ...) (int 2)))") decoded4 = decode(encoded4) print(f"Decoded: {decoded4}") diff --git a/python/experiments/test_new_reference_style.py b/python/experiments/test_new_reference_style.py index 7b3482a..4df8851 100644 --- a/python/experiments/test_new_reference_style.py +++ b/python/experiments/test_new_reference_style.py @@ -13,7 +13,7 @@ - The container should have link_id set to establish the self-reference """ -from links_notation import Link, Parser, format_links +from links_notation import Link, Parser # Test 1: Simple self-reference using new style print("=== Test 1: Simple self-reference ===") @@ -24,11 +24,13 @@ obj_link = Link( link_id="obj_0", values=[ - Link(values=[ - Link(link_id="self"), - Link(link_id="obj_0") # Direct reference, not (ref obj_0) - ]) - ] + Link( + values=[ + Link(link_id="self"), + Link(link_id="obj_0"), # Direct reference, not (ref obj_0) + ] + ) + ], ) encoded = obj_link.format() print(f"Encoded: {encoded}") @@ -47,16 +49,16 @@ link_id="obj_1", values=[ Link(values=[Link(link_id="1"), Link(link_id="1")]), - Link(values=[Link(link_id="2"), Link(link_id="2")]) - ] + Link(values=[Link(link_id="2"), Link(link_id="2")]), + ], ) outer_obj = Link( link_id="obj_0", values=[ Link(values=[Link(link_id="self"), Link(link_id="obj_0")]), - Link(values=[Link(link_id="other"), inner_obj]) - ] + Link(values=[Link(link_id="other"), inner_obj]), + ], ) encoded2 = outer_obj.format() @@ -76,7 +78,7 @@ link_id="obj_0", values=[ Link(link_id="obj_0") # Direct self-reference - ] + ], ) encoded3 = list_link.format() diff --git a/python/experiments/test_new_reference_style2.py b/python/experiments/test_new_reference_style2.py index 67ae5c7..46accf6 100644 --- a/python/experiments/test_new_reference_style2.py +++ b/python/experiments/test_new_reference_style2.py @@ -23,8 +23,8 @@ # (self obj_0) Link(values=[Link(link_id="self"), Link(link_id="obj_0")]), # (num 42) - Link(values=[Link(link_id="num"), Link(link_id="42")]) - ] + Link(values=[Link(link_id="num"), Link(link_id="42")]), + ], ) encoded = obj_link.format() diff --git a/python/experiments/test_roundtrip.py b/python/experiments/test_roundtrip.py index db18684..b6937fb 100644 --- a/python/experiments/test_roundtrip.py +++ b/python/experiments/test_roundtrip.py @@ -33,4 +33,5 @@ except Exception as e: print(f"ERROR: {e}") import traceback + traceback.print_exc() diff --git a/python/experiments/test_roundtrip_format.py b/python/experiments/test_roundtrip_format.py index 74db156..36d4ef2 100644 --- a/python/experiments/test_roundtrip_format.py +++ b/python/experiments/test_roundtrip_format.py @@ -4,27 +4,25 @@ from links_notation import Link, Parser import base64 + def create_test_structure(): """Create: obj = {"self": obj}""" # Expected output: (obj_0: dict ((str c2VsZg==) obj_0)) # OR: (obj_0: dict obj_0 ((str c2VsZg==) obj_0)) - self_key_b64 = base64.b64encode(b'self').decode('ascii') + self_key_b64 = base64.b64encode(b"self").decode("ascii") print(f"'self' encoded: {self_key_b64}") # Format 1: WITHOUT redundant obj_0 - print("\n" + "="*60) + print("\n" + "=" * 60) print("Format 1: (obj_0: dict ((str c2VsZg==) obj_0))") - print("="*60) + print("=" * 60) - str_key = Link(values=[Link(link_id='str'), Link(link_id=self_key_b64)]) - obj_ref = Link(link_id='obj_0') + str_key = Link(values=[Link(link_id="str"), Link(link_id=self_key_b64)]) + obj_ref = Link(link_id="obj_0") pair = Link(values=[str_key, obj_ref]) - dict_link1 = Link(link_id='obj_0', values=[ - Link(link_id='dict'), - pair - ]) + dict_link1 = Link(link_id="obj_0", values=[Link(link_id="dict"), pair]) encoded1 = dict_link1.format() print(f"Encoded: {encoded1}") @@ -35,15 +33,18 @@ def create_test_structure(): print(f"Parsed: {parsed1[0] if parsed1 else None}") # Format 2: WITH redundant obj_0 (as user showed) - print("\n" + "="*60) + print("\n" + "=" * 60) print("Format 2: (obj_0: dict obj_0 ((str c2VsZg==) obj_0))") - print("="*60) + print("=" * 60) - dict_link2 = Link(link_id='obj_0', values=[ - Link(link_id='dict'), - Link(link_id='obj_0'), # Reference to self - pair - ]) + dict_link2 = Link( + link_id="obj_0", + values=[ + Link(link_id="dict"), + Link(link_id="obj_0"), # Reference to self + pair, + ], + ) encoded2 = dict_link2.format() print(f"Encoded: {encoded2}") @@ -53,11 +54,11 @@ def create_test_structure(): print(f"Parsed: {parsed2[0] if parsed2 else None}") # Format 3: NO dict marker, just pairs (from original issue) - print("\n" + "="*60) + print("\n" + "=" * 60) print("Format 3: (obj_0: ((str c2VsZg==) obj_0)) - no dict marker") - print("="*60) + print("=" * 60) - dict_link3 = Link(link_id='obj_0', values=[pair]) + dict_link3 = Link(link_id="obj_0", values=[pair]) encoded3 = dict_link3.format() print(f"Encoded: {encoded3}") @@ -66,5 +67,6 @@ def create_test_structure(): parsed3 = parser.parse(encoded3) print(f"Parsed: {parsed3[0] if parsed3 else None}") -if __name__ == '__main__': + +if __name__ == "__main__": create_test_structure() diff --git a/python/experiments/test_shared_object.py b/python/experiments/test_shared_object.py index b83750a..13a1413 100644 --- a/python/experiments/test_shared_object.py +++ b/python/experiments/test_shared_object.py @@ -21,4 +21,6 @@ print(f"First item type: {type(decoded[0])}") print(f"First item: {decoded[0]}") if len(decoded) > 1: - print(f"All same object: {decoded[0] is decoded[1] is decoded[2] if len(decoded) > 2 else 'N/A'}") + print( + f"All same object: {decoded[0] is decoded[1] is decoded[2] if len(decoded) > 2 else 'N/A'}" + ) diff --git a/python/experiments/test_string_encoding.py b/python/experiments/test_string_encoding.py index 64e09c8..3a6b6f8 100644 --- a/python/experiments/test_string_encoding.py +++ b/python/experiments/test_string_encoding.py @@ -31,6 +31,7 @@ except Exception as e: print(f"Error: {e}") import traceback + traceback.print_exc() # Test how to properly escape strings in links notation diff --git a/python/pyproject.toml b/python/pyproject.toml index 86e6f12..1741015 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "lino-objects-codec" -version = "0.1.0" +version = "0.2.0" description = "A library to encode/decode objects to/from links notation" readme = "README.md" requires-python = ">=3.13" diff --git a/python/src/link_notation_objects_codec/__init__.py b/python/src/link_notation_objects_codec/__init__.py index 0b3982b..7206d2f 100644 --- a/python/src/link_notation_objects_codec/__init__.py +++ b/python/src/link_notation_objects_codec/__init__.py @@ -6,6 +6,20 @@ """ from .codec import ObjectCodec, decode, encode +from .format import ( + escape_reference, + format_indented, + parse_indented, + unescape_reference, +) -__version__ = "0.1.0" -__all__ = ["ObjectCodec", "encode", "decode"] +__version__ = "0.2.0" +__all__ = [ + "ObjectCodec", + "encode", + "decode", + "escape_reference", + "unescape_reference", + "format_indented", + "parse_indented", +] diff --git a/python/src/link_notation_objects_codec/format.py b/python/src/link_notation_objects_codec/format.py new file mode 100644 index 0000000..e39268a --- /dev/null +++ b/python/src/link_notation_objects_codec/format.py @@ -0,0 +1,248 @@ +""" +Formatting utilities for Links Notation. + +These utilities provide functions for formatting and parsing indented Links Notation format. +Uses the links-notation library for parsing to ensure compatibility with the standard format. +""" + +import re +from typing import Any, Dict, Optional, Tuple + +from links_notation import Parser + +# Shared parser instance +_parser = Parser() + + +def escape_reference(value: Any) -> str: + """ + Escape a reference for Links Notation. + + References need escaping when they contain spaces, quotes, parentheses, colons, or newlines. + + Args: + value: The value to escape + + Returns: + The escaped reference string + """ + # Numbers and booleans don't need escaping + if isinstance(value, (int, float, bool)): + return str(value) + + s = str(value) + + # Check if escaping is needed + needs_escaping = bool(re.search(r'[\s()\'":]', s)) or "\n" in s + + if not needs_escaping: + return s + + # If contains single quotes but not double quotes, use double quotes + if "'" in s and '"' not in s: + return f'"{s}"' + + # If contains double quotes but not single quotes, use single quotes + if '"' in s and "'" not in s: + return f"'{s}'" + + # If contains both quotes, count which one appears more + if "'" in s and '"' in s: + single_count = s.count("'") + double_count = s.count('"') + + if double_count < single_count: + # Use double quotes, escape internal double quotes by doubling + return f'"{s.replace(chr(34), chr(34) + chr(34))}"' + else: + # Use single quotes, escape internal single quotes by doubling + return f"'{s.replace(chr(39), chr(39) + chr(39))}'" + + # Just spaces or other special characters, use single quotes by default + return f"'{s}'" + + +def unescape_reference(s: Optional[str]) -> Optional[str]: + """ + Unescape a reference from Links Notation format. + + Reverses the escaping done by escape_reference. + + Args: + s: The escaped reference string + + Returns: + The unescaped string + """ + if s is None: + return s + + # Unescape doubled quotes + unescaped = s.replace('""', '"') + unescaped = unescaped.replace("''", "'") + + return unescaped + + +def _format_indented_value(value: Any) -> str: + """ + Format a value for display in indented Links Notation. + Uses quoting strategy compatible with the links-notation parser: + - If value contains double quotes, wrap in single quotes + - Otherwise, wrap in double quotes + + Args: + value: The value to format + + Returns: + Formatted value with appropriate quotes + """ + if value is None: + return '"null"' + + s = str(value) + + # If contains double quotes but no single quotes, use single quotes + if '"' in s and "'" not in s: + return f"'{s}'" + + # If contains single quotes but no double quotes, use double quotes + if "'" in s and '"' not in s: + return f'"{s}"' + + # If contains both, use single quotes and escape internal single quotes + if "'" in s and '"' in s: + escaped = s.replace("'", "''") + return f"'{escaped}'" + + # Default: use double quotes + return f'"{s}"' + + +def format_indented( + id: str, + obj: Dict[str, Any], + indent: str = " ", +) -> str: + """ + Format an object in indented Links Notation format. + + This format is designed for human readability, displaying objects as: + + + "" + "" + ... + + Example: + >>> format_indented( + ... '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', + ... {'uuid': '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', 'status': 'executed'} + ... ) + '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019\\n uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"\\n status "executed"' + + Args: + id: The object identifier (displayed on first line) + obj: The object (dict) with key-value pairs to format + indent: The indentation string (default: 2 spaces) + + Returns: + Formatted indented Links Notation string + + Raises: + ValueError: If id is empty or obj is not a dict + """ + if not id: + raise ValueError("id is required for format_indented") + + if not isinstance(obj, dict): + raise ValueError("obj must be a dict for format_indented") + + lines = [id] + + for key, value in obj.items(): + escaped_key = escape_reference(key) + formatted_value = _format_indented_value(value) + lines.append(f"{indent}{escaped_key} {formatted_value}") + + return "\n".join(lines) + + +def parse_indented(text: str) -> Tuple[str, Dict[str, Any]]: + """ + Parse an indented Links Notation string back to an object. + + This function uses the links-notation parser for proper parsing, + supporting the standard Links Notation indented syntax. + + Parses strings like: + + + "" + "" + ... + + The format with colon after identifier is also supported (standard lino): + + : + "" + + Args: + text: The indented Links Notation string to parse + + Returns: + A tuple of (id, obj) where id is the identifier and obj is the parsed dict + + Raises: + ValueError: If text is empty or invalid + """ + if not text: + raise ValueError("text is required for parse_indented") + + lines = text.split("\n") + if len(lines) == 0: + raise ValueError("text must have at least one line (the identifier)") + + # Filter out empty lines to preserve indentation structure for the parser + # Empty lines would break the indentation context in links-notation + non_empty_lines = [line for line in lines if line.strip()] + + if len(non_empty_lines) == 0: + raise ValueError("text must have at least one non-empty line (the identifier)") + + # Convert to standard lino format by adding colon after first line if not present + # This allows the links-notation parser to properly parse the indented structure + first_line = non_empty_lines[0].strip() + if not first_line.endswith(":"): + lino_text = first_line + ":\n" + "\n".join(non_empty_lines[1:]) + else: + lino_text = "\n".join(non_empty_lines) + + # Use links-notation parser + parsed = _parser.parse(lino_text) + + if not parsed or len(parsed) == 0: + raise ValueError("Failed to parse indented Links Notation") + + # Extract id and key-value pairs from parsed result + main_link = parsed[0] + result_id = main_link.id or "" + obj: Dict[str, Any] = {} + + # Process the values array - each entry is a doublet (key value) + for child in main_link.values or []: + if hasattr(child, "values") and child.values and len(child.values) == 2: + key_ref = child.values[0] + value_ref = child.values[1] + + # Get key string + key = key_ref.id or "" + + # Get value string, handling null + value_str = value_ref.id + if value_str == "null": + obj[key] = None + else: + obj[key] = value_str + + return result_id, obj diff --git a/python/test_encoder_fix.py b/python/test_encoder_fix.py index 673d847..048d88a 100644 --- a/python/test_encoder_fix.py +++ b/python/test_encoder_fix.py @@ -2,7 +2,8 @@ """Test the updated encoder implementation.""" import sys -sys.path.insert(0, 'src') + +sys.path.insert(0, "src") from link_notation_objects_codec import encode, decode @@ -30,7 +31,7 @@ decoded = decode(encoded) print(f" Decoded has 'name': {'name' in decoded}") print(f" Decoded has 'other': {'other' in decoded}") -if 'other' in decoded and 'other' in decoded['other']: +if "other" in decoded and "other" in decoded["other"]: print(f" Circular ref works: {decoded['other']['other'] is decoded}") print() @@ -61,7 +62,7 @@ print(f" Lines: {len(encoded.split(chr(10)))}") decoded = decode(encoded) print(f" Decoded has 'children': {'children' in decoded}") -if 'children' in decoded and len(decoded['children']) > 0: +if "children" in decoded and len(decoded["children"]) > 0: print(f" Children count: {len(decoded['children'])}") - if 'parent' in decoded['children'][0]: + if "parent" in decoded["children"][0]: print(f" Circular ref works: {decoded['children'][0]['parent'] is decoded}") diff --git a/python/tests/test_format.py b/python/tests/test_format.py new file mode 100644 index 0000000..7e56f1f --- /dev/null +++ b/python/tests/test_format.py @@ -0,0 +1,188 @@ +"""Tests for formatting utilities (format_indented, parse_indented).""" + +import pytest + +from link_notation_objects_codec import ( + escape_reference, + format_indented, + parse_indented, + unescape_reference, +) + + +class TestEscapeReference: + """Tests for escape_reference function.""" + + def test_simple_string(self): + assert escape_reference("hello") == "hello" + assert escape_reference("world") == "world" + + def test_numbers(self): + assert escape_reference(42) == "42" + assert escape_reference(3.14) == "3.14" + assert escape_reference(-17) == "-17" + + def test_booleans(self): + assert escape_reference(True) == "True" + assert escape_reference(False) == "False" + + def test_string_with_spaces(self): + result = escape_reference("hello world") + assert result.startswith("'") or result.startswith('"') + assert "hello world" in result + + def test_string_with_single_quotes(self): + result = escape_reference("it's") + assert result.startswith('"') + assert result == '"it\'s"' + + def test_string_with_double_quotes(self): + result = escape_reference('he said "hello"') + assert result.startswith("'") + assert result == "'he said \"hello\"'" + + def test_string_with_both_quotes(self): + result = escape_reference('"it\'s" he said') + assert result.startswith("'") or result.startswith('"') + + +class TestUnescapeReference: + """Tests for unescape_reference function.""" + + def test_simple_string(self): + assert unescape_reference("hello") == "hello" + + def test_doubled_double_quotes(self): + assert unescape_reference('he said ""hello""') == 'he said "hello"' + + def test_doubled_single_quotes(self): + assert unescape_reference("it''s") == "it's" + + def test_none(self): + assert unescape_reference(None) is None + + +class TestFormatIndented: + """Tests for format_indented function.""" + + def test_basic_object(self): + result = format_indented( + "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", + { + "uuid": "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", + "status": "executed", + "command": "echo test", + "exitCode": "0", + }, + ) + lines = result.split("\n") + assert lines[0] == "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + assert lines[1] == ' uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"' + assert lines[2] == ' status "executed"' + assert lines[3] == ' command "echo test"' + assert lines[4] == ' exitCode "0"' + + def test_custom_indentation(self): + result = format_indented("test-id", {"key": "value"}, indent=" ") + lines = result.split("\n") + assert lines[0] == "test-id" + assert lines[1] == ' key "value"' + + def test_value_with_double_quotes(self): + # Values containing double quotes are wrapped in single quotes (links-notation style) + result = format_indented("test-id", {"message": 'He said "hello"'}) + lines = result.split("\n") + assert lines[0] == "test-id" + assert lines[1] == " message 'He said \"hello\"'" + + def test_key_with_space(self): + result = format_indented("test-id", {"key with space": "value"}) + lines = result.split("\n") + assert lines[0] == "test-id" + assert "'key with space'" in lines[1] or '"key with space"' in lines[1] + + def test_null_value(self): + result = format_indented("test-id", {"key": None}) + lines = result.split("\n") + assert lines[0] == "test-id" + assert lines[1] == ' key "null"' + + def test_requires_id(self): + with pytest.raises(ValueError, match="id is required"): + format_indented("", {"key": "value"}) + + def test_requires_dict(self): + with pytest.raises(ValueError, match="obj must be a dict"): + format_indented("test", [1, 2, 3]) # type: ignore + + +class TestParseIndented: + """Tests for parse_indented function.""" + + def test_basic_object(self): + text = """6dcf4c1b-ff3f-482c-95ab-711ea7d1b019 + uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + status "executed" + command "echo test" + exitCode "0\"""" + + id, obj = parse_indented(text) + assert id == "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + assert obj["uuid"] == "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + assert obj["status"] == "executed" + assert obj["command"] == "echo test" + assert obj["exitCode"] == "0" + + def test_value_with_quotes(self): + # Links-notation style: use single quotes to wrap value containing double quotes + text = """test-id + message 'He said "hello"'""" + + id, obj = parse_indented(text) + assert id == "test-id" + assert obj["message"] == 'He said "hello"' + + def test_empty_lines_are_skipped(self): + text = """test-id + + key "value" + + another "value2\"""" + + id, obj = parse_indented(text) + assert id == "test-id" + assert obj["key"] == "value" + assert obj["another"] == "value2" + + def test_requires_text(self): + with pytest.raises(ValueError, match="text is required"): + parse_indented("") + + +class TestRoundtrip: + """Roundtrip tests for format_indented/parse_indented.""" + + def test_basic_roundtrip(self): + original_id = "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" + original_obj = { + "uuid": "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", + "status": "executed", + "command": "echo test", + "exitCode": "0", + } + + formatted = format_indented(original_id, original_obj) + parsed_id, parsed_obj = parse_indented(formatted) + + assert parsed_id == original_id + assert parsed_obj == original_obj + + def test_roundtrip_with_quotes(self): + original_id = "test-id" + original_obj = {"message": 'He said "hello"'} + + formatted = format_indented(original_id, original_obj) + parsed_id, parsed_obj = parse_indented(formatted) + + assert parsed_id == original_id + assert parsed_obj == original_obj diff --git a/rust/Cargo.lock b/rust/Cargo.lock index c71bd93..0eb90d2 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -19,7 +19,7 @@ dependencies = [ [[package]] name = "lino-objects-codec" -version = "0.1.0" +version = "0.2.0" dependencies = [ "base64", "links-notation", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 224e237..ca38ec6 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lino-objects-codec" -version = "0.1.0" +version = "0.2.0" edition = "2021" rust-version = "1.70" description = "A library to encode/decode objects to/from links notation" diff --git a/rust/src/lib.rs b/rust/src/lib.rs index d8a9da1..00f8b2b 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -896,6 +896,342 @@ pub fn decode(notation: &str) -> Result { DEFAULT_CODEC.with(|codec| codec.borrow_mut().decode(notation)) } +/// Formatting utilities for indented Links Notation format. +pub mod format { + use super::{parse_lino_to_links, LiNo}; + use std::collections::HashMap; + + /// Error types for format operations + #[derive(Debug, Clone, PartialEq, Eq)] + pub enum FormatError { + /// Missing required field + MissingField(String), + /// Invalid input + InvalidInput(String), + } + + impl std::fmt::Display for FormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FormatError::MissingField(field) => write!(f, "Missing required field: {}", field), + FormatError::InvalidInput(msg) => write!(f, "Invalid input: {}", msg), + } + } + } + + impl std::error::Error for FormatError {} + + /// Escape a reference for Links Notation. + /// + /// References need escaping when they contain spaces, quotes, parentheses, colons, or newlines. + /// + /// # Arguments + /// + /// * `value` - The value to escape + /// + /// # Returns + /// + /// The escaped reference string + pub fn escape_reference(value: &str) -> String { + // Check if escaping is needed + let needs_escaping = value.chars().any(|c| { + c.is_whitespace() || c == '(' || c == ')' || c == '\'' || c == '"' || c == ':' + }) || value.contains('\n'); + + if !needs_escaping { + return value.to_string(); + } + + let has_single = value.contains('\''); + let has_double = value.contains('"'); + + // If contains single quotes but not double quotes, use double quotes + if has_single && !has_double { + return format!("\"{}\"", value); + } + + // If contains double quotes but not single quotes, use single quotes + if has_double && !has_single { + return format!("'{}'", value); + } + + // If contains both quotes, count which one appears more + if has_single && has_double { + let single_count = value.chars().filter(|&c| c == '\'').count(); + let double_count = value.chars().filter(|&c| c == '"').count(); + + if double_count < single_count { + // Use double quotes, escape internal double quotes by doubling + let escaped = value.replace('"', "\"\""); + return format!("\"{}\"", escaped); + } + // Use single quotes, escape internal single quotes by doubling + let escaped = value.replace('\'', "''"); + return format!("'{}'", escaped); + } + + // Just spaces or other special characters, use single quotes by default + format!("'{}'", value) + } + + /// Unescape a reference from Links Notation format. + /// + /// Reverses the escaping done by escape_reference. + /// + /// # Arguments + /// + /// * `s` - The escaped reference string + /// + /// # Returns + /// + /// The unescaped string + pub fn unescape_reference(s: &str) -> String { + s.replace("\"\"", "\"").replace("''", "'") + } + + /// Format a value for display in indented Links Notation. + /// Uses quoting strategy compatible with the links-notation parser: + /// - If value contains double quotes, wrap in single quotes + /// - Otherwise, wrap in double quotes + fn format_indented_value(value: &str) -> String { + let has_single = value.contains('\''); + let has_double = value.contains('"'); + + // If contains double quotes but no single quotes, use single quotes + if has_double && !has_single { + return format!("'{}'", value); + } + + // If contains single quotes but no double quotes, use double quotes + if has_single && !has_double { + return format!("\"{}\"", value); + } + + // If contains both, use single quotes and escape internal single quotes + if has_single && has_double { + let escaped = value.replace('\'', "''"); + return format!("'{}'", escaped); + } + + // Default: use double quotes + format!("\"{}\"", value) + } + + /// Format an object in indented Links Notation format. + /// + /// This format is designed for human readability, displaying objects as: + /// + /// ```text + /// + /// "" + /// "" + /// ... + /// ``` + /// + /// # Arguments + /// + /// * `id` - The object identifier (displayed on first line) + /// * `obj` - The object as key-value pairs to format + /// * `indent` - The indentation string (default: 2 spaces) + /// + /// # Returns + /// + /// Formatted indented Links Notation string, or an error + /// + /// # Example + /// + /// ```rust + /// use lino_objects_codec::format::format_indented; + /// use std::collections::HashMap; + /// + /// let mut obj = HashMap::new(); + /// obj.insert("status".to_string(), "executed".to_string()); + /// obj.insert("exitCode".to_string(), "0".to_string()); + /// + /// let result = format_indented("my-uuid", &obj, " ").unwrap(); + /// assert!(result.starts_with("my-uuid\n")); + /// ``` + pub fn format_indented( + id: &str, + obj: &HashMap, + indent: &str, + ) -> Result { + if id.is_empty() { + return Err(FormatError::MissingField("id".to_string())); + } + + let mut lines = vec![id.to_string()]; + + for (key, value) in obj { + let escaped_key = escape_reference(key); + let formatted_value = format_indented_value(value); + lines.push(format!("{}{} {}", indent, escaped_key, formatted_value)); + } + + Ok(lines.join("\n")) + } + + /// Format an object in indented Links Notation format, maintaining key order. + /// + /// This is similar to `format_indented` but takes a slice of tuples to preserve + /// the order of keys. + /// + /// # Arguments + /// + /// * `id` - The object identifier (displayed on first line) + /// * `pairs` - The key-value pairs in order + /// * `indent` - The indentation string (default: 2 spaces) + /// + /// # Returns + /// + /// Formatted indented Links Notation string, or an error + pub fn format_indented_ordered( + id: &str, + pairs: &[(&str, &str)], + indent: &str, + ) -> Result { + if id.is_empty() { + return Err(FormatError::MissingField("id".to_string())); + } + + let mut lines = vec![id.to_string()]; + + for (key, value) in pairs { + let escaped_key = escape_reference(key); + let formatted_value = format_indented_value(value); + lines.push(format!("{}{} {}", indent, escaped_key, formatted_value)); + } + + Ok(lines.join("\n")) + } + + /// Parse an indented Links Notation string back to an object. + /// + /// This function uses the links-notation parser for proper parsing, + /// supporting the standard Links Notation indented syntax. + /// + /// Parses strings like: + /// + /// ```text + /// + /// "" + /// "" + /// ... + /// ``` + /// + /// The format with colon after identifier is also supported (standard lino): + /// + /// ```text + /// : + /// "" + /// ``` + /// + /// # Arguments + /// + /// * `text` - The indented Links Notation string to parse + /// + /// # Returns + /// + /// A tuple of (id, HashMap of key-value pairs), or an error + /// + /// # Example + /// + /// ```rust + /// use lino_objects_codec::format::parse_indented; + /// + /// let text = "my-uuid\n status \"executed\"\n exitCode \"0\""; + /// let (id, obj) = parse_indented(text).unwrap(); + /// assert_eq!(id, "my-uuid"); + /// assert_eq!(obj.get("status"), Some(&"executed".to_string())); + /// ``` + pub fn parse_indented(text: &str) -> Result<(String, HashMap), FormatError> { + if text.is_empty() { + return Err(FormatError::InvalidInput( + "text is required for parse_indented".to_string(), + )); + } + + let lines: Vec<&str> = text.lines().collect(); + if lines.is_empty() { + return Err(FormatError::InvalidInput( + "text must have at least one line (the identifier)".to_string(), + )); + } + + // Filter out empty lines to preserve indentation structure for the parser + // Empty lines would break the indentation context in links-notation + let non_empty_lines: Vec<&str> = lines + .iter() + .filter(|l| !l.trim().is_empty()) + .copied() + .collect(); + + if non_empty_lines.is_empty() { + return Err(FormatError::InvalidInput( + "text must have at least one non-empty line (the identifier)".to_string(), + )); + } + + // Convert to standard lino format by adding colon after first line if not present + // This allows the links-notation parser to properly parse the indented structure + let first_line = non_empty_lines[0].trim(); + let lino_text = if first_line.ends_with(':') { + non_empty_lines.join("\n") + } else { + format!("{}:\n{}", first_line, non_empty_lines[1..].join("\n")) + }; + + // Use links-notation parser + let parsed = parse_lino_to_links(&lino_text) + .map_err(|e| FormatError::InvalidInput(format!("Parse error: {:?}", e)))?; + + if parsed.is_empty() { + return Err(FormatError::InvalidInput( + "Failed to parse indented Links Notation".to_string(), + )); + } + + // Extract id and key-value pairs from parsed result + let main_link = &parsed[0]; + let (result_id, values) = match main_link { + LiNo::Link { id, values } => (id.clone().unwrap_or_default(), values), + LiNo::Ref(id) => (id.clone(), &vec![]), + }; + + let mut obj = HashMap::new(); + + // Process the values array - each entry is a doublet (key value) + for child in values { + if let LiNo::Link { + values: child_values, + .. + } = child + { + if child_values.len() == 2 { + let key_ref = &child_values[0]; + let value_ref = &child_values[1]; + + // Get key string + let key = match key_ref { + LiNo::Ref(k) => k.clone(), + LiNo::Link { id, .. } => id.clone().unwrap_or_default(), + }; + + // Get value string + let value = match value_ref { + LiNo::Ref(v) => v.clone(), + LiNo::Link { id, .. } => id.clone().unwrap_or_default(), + }; + + obj.insert(key, value); + } + } + } + + Ok((result_id, obj)) + } +} + #[cfg(test)] mod tests { use super::*; @@ -1174,3 +1510,146 @@ mod tests { assert_eq!(decoded, original); } } + +#[cfg(test)] +mod format_tests { + use super::format::*; + use std::collections::HashMap; + + #[test] + fn test_escape_reference_simple_string() { + assert_eq!(escape_reference("hello"), "hello"); + assert_eq!(escape_reference("world"), "world"); + } + + #[test] + fn test_escape_reference_string_with_spaces() { + let result = escape_reference("hello world"); + assert!(result.starts_with('\'') || result.starts_with('"')); + assert!(result.contains("hello world")); + } + + #[test] + fn test_escape_reference_string_with_single_quotes() { + let result = escape_reference("it's"); + assert_eq!(result, "\"it's\""); + } + + #[test] + fn test_escape_reference_string_with_double_quotes() { + let result = escape_reference("he said \"hello\""); + assert_eq!(result, "'he said \"hello\"'"); + } + + #[test] + fn test_unescape_reference_doubled_quotes() { + assert_eq!( + unescape_reference("he said \"\"hello\"\""), + "he said \"hello\"" + ); + assert_eq!(unescape_reference("it''s"), "it's"); + } + + #[test] + fn test_format_indented_ordered_basic() { + let pairs = [ + ("uuid", "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"), + ("status", "executed"), + ("command", "echo test"), + ("exitCode", "0"), + ]; + let result = + format_indented_ordered("6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", &pairs, " ").unwrap(); + let lines: Vec<&str> = result.lines().collect(); + assert_eq!(lines[0], "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"); + assert_eq!(lines[1], " uuid \"6dcf4c1b-ff3f-482c-95ab-711ea7d1b019\""); + assert_eq!(lines[2], " status \"executed\""); + assert_eq!(lines[3], " command \"echo test\""); + assert_eq!(lines[4], " exitCode \"0\""); + } + + #[test] + fn test_format_indented_value_with_quotes() { + // Values containing double quotes are wrapped in single quotes (links-notation style) + let pairs = [("message", "He said \"hello\"")]; + let result = format_indented_ordered("test-id", &pairs, " ").unwrap(); + let lines: Vec<&str> = result.lines().collect(); + assert_eq!(lines[1], " message 'He said \"hello\"'"); + } + + #[test] + fn test_format_indented_requires_id() { + let mut obj = HashMap::new(); + obj.insert("key".to_string(), "value".to_string()); + let result = format_indented("", &obj, " "); + assert!(result.is_err()); + } + + #[test] + fn test_parse_indented_basic() { + let text = "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019\n uuid \"6dcf4c1b-ff3f-482c-95ab-711ea7d1b019\"\n status \"executed\"\n exitCode \"0\""; + let (id, obj) = parse_indented(text).unwrap(); + assert_eq!(id, "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"); + assert_eq!( + obj.get("uuid"), + Some(&"6dcf4c1b-ff3f-482c-95ab-711ea7d1b019".to_string()) + ); + assert_eq!(obj.get("status"), Some(&"executed".to_string())); + assert_eq!(obj.get("exitCode"), Some(&"0".to_string())); + } + + #[test] + fn test_parse_indented_with_quotes() { + // Links-notation style: use single quotes to wrap value containing double quotes + let text = "test-id\n message 'He said \"hello\"'"; + let (id, obj) = parse_indented(text).unwrap(); + assert_eq!(id, "test-id"); + assert_eq!(obj.get("message"), Some(&"He said \"hello\"".to_string())); + } + + #[test] + fn test_parse_indented_empty_lines_skipped() { + let text = "test-id\n\n key \"value\"\n\n another \"value2\""; + let (id, obj) = parse_indented(text).unwrap(); + assert_eq!(id, "test-id"); + assert_eq!(obj.get("key"), Some(&"value".to_string())); + assert_eq!(obj.get("another"), Some(&"value2".to_string())); + } + + #[test] + fn test_parse_indented_requires_text() { + let result = parse_indented(""); + assert!(result.is_err()); + } + + #[test] + fn test_roundtrip_format_indented() { + let pairs = [ + ("uuid", "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"), + ("status", "executed"), + ("command", "echo test"), + ("exitCode", "0"), + ]; + let formatted = + format_indented_ordered("6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", &pairs, " ").unwrap(); + let (parsed_id, parsed_obj) = parse_indented(&formatted).unwrap(); + + assert_eq!(parsed_id, "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"); + for (key, value) in pairs { + assert_eq!(parsed_obj.get(key), Some(&value.to_string())); + } + } + + #[test] + fn test_roundtrip_with_quotes() { + let pairs = [("message", "He said \"hello\"")]; + let formatted = format_indented_ordered("test-id", &pairs, " ").unwrap(); + let (parsed_id, parsed_obj) = parse_indented(&formatted).unwrap(); + + assert_eq!(parsed_id, "test-id"); + assert_eq!( + parsed_obj.get("message"), + Some(&"He said \"hello\"".to_string()) + ); + } +}