diff --git a/difflib/README.md b/difflib/README.md new file mode 100644 index 00000000..ed6f6869 --- /dev/null +++ b/difflib/README.md @@ -0,0 +1,62 @@ +## Introduction + +Module `difflib` -- helpers for computing deltas between objects. + +## How to Use + ++ Add the dependency + +```shell +kcl mod add difflib +``` + ++ Write the code + +```python +import difflib +import yaml + +data1 = { + "firstName": "John", + "lastName": "Doe", + "age": 30, + "address": { + "streetAddress": "1234 Main St", + "city": "New York", + "state": "NY", + "postalCode": "10001" + }, + "phoneNumbers": [ + { + "type": "home", + "number": "212-555-1234" + }, + { + "type": "work", + "number": "646-555-5678" + } + ] +} +data2 = { + "firstName": "John", + "lastName": "Doe", + "age": 30, + "address": { + "streetAddress": "1234 Main St", + "city": "New York", + "state": "NY", + "postalCode": None + }, + "phoneNumbers": [ + { + "type": "work", + "number": "646-555-5678" + } + ] +} +diff = difflib.diff(yaml.encode(data1), yaml.encode(data2)) +``` + +## Resource + +The code source and documents are [here](https://github.com/kcl-lang/artifacthub/tree/main/difflib) diff --git a/difflib/kcl.mod b/difflib/kcl.mod new file mode 100644 index 00000000..3ae18757 --- /dev/null +++ b/difflib/kcl.mod @@ -0,0 +1,5 @@ +[package] +name = "difflib" +edition = "v0.9.0" +version = "0.1.0" + diff --git a/difflib/kcl.mod.lock b/difflib/kcl.mod.lock new file mode 100644 index 00000000..e69de29b diff --git a/difflib/main.k b/difflib/main.k new file mode 100644 index 00000000..5bda3790 --- /dev/null +++ b/difflib/main.k @@ -0,0 +1,108 @@ +"""Module difflib -- helpers for computing deltas between objects. +""" + +_looper_n = lambda elements: [any], n: int, func: (any, any) -> any, initial: any -> any { + assert n >= 0 + result = initial + if n < len(elements): + result = _looper_n(elements, n + 1, func, func(result, elements[n])) + + result +} + +looper = lambda initial: any, elements: [any], func: (any, any) -> any -> any { + _looper_n(elements, 0, func, initial) +} + +looper_enumerate = lambda initial: any, elements: [any] | {str:}, func: (any, str | int, any) -> any -> any { + looper(initial, [{"k" = k, "v" = v} for k, v in elements], lambda initial, value { + func(initial, value.k, value.v) + }) +} + +for_each = lambda elements: [any], func: (any) { + [func(i) for i in elements] + Undefined +} + +while_loop = lambda condition: ([any]) -> bool, body: ([any]) -> [any], vals: [any] -> [any] { + """Do a while loop using the condition function, body function and variables with side effects that need to be modified in place, such as iteration variables, etc.""" + vals if not condition(vals) else while_loop(condition, body, body(vals)) +} + +list_set_index = lambda l: [], i: int, v { + """Set the list `l` at index `i` with the value `v`""" + l = l[:i:] + [v] + l[i + 1::] + l +} + +longest_common_subsequence = lambda a: [], b: [] -> [] { + """Longest Common Subsequence (LCS) is a typical algorithm for calculating the length of the longest common subsequence between two sequences.""" + # Build the lengths matrix for dp + lengths = [[0] * (len(b) + 1) for _ in range(len(a) + 1)] + lengths = looper_enumerate(lengths, a, lambda m, i, x { + looper_enumerate(m, b, lambda v, j, y { + list_set_index(v, i + 1, list_set_index(v[i + 1], j + 1, v[i][j] + 1 if x == y else max(v[i + 1][j], v[i][j + 1]))) + }) + }) + vals = [len(a), len(b), []] + # Read the substrings out from the matrix + while_loop(lambda vals: [any] { + vals[0] != 0 and vals[1] != 0 + }, lambda vals: [any] { + x = vals[0] + y = vals[1] + result = vals[2] + if lengths[x][y] == lengths[x - 1][y]: + x -= 1 + elif lengths[x][y] == lengths[x][y - 1]: + y -= 1 + else: + assert a[x - 1] == b[y - 1], "{} != {}".format(a[x - 1], b[y - 1]) + result = [a[x - 1]] + result + x -= 1 + y -= 1 + [x, y, result] + }, vals)[-1] +} + +ndiff = lambda a: [str], b: [str] -> str { + """Compare a and b (lists of strings); return a Differ-style delta string.""" + lcs = longest_common_subsequence(a, b) + # while loop variabels: [i, j, lcs, diff_str] + vals = [0, 0, lcs, ""] + len_a = len(a) + len_b = len(b) + while_loop(lambda vals { + vals[0] < len_a or vals[1] < len_b + }, lambda vals { + i = vals[0] + j = vals[1] + lcs = vals[2] + diff_str = vals[3] + if i < len(a) and j < len(b) and a[i] == b[j]: + diff_str += " " + a[i] + "\n" + i += 1 + j += 1 + elif j < len(b) and (not lcs or i >= len(a) or a[i] != lcs[0]): + diff_str += "+ " + b[j] + "\n" + j += 1 + elif i < len(a) and (not lcs or j >= len(b) or b[j] != lcs[0]): + diff_str += "- " + a[i] + "\n" + i += 1 + else: + if lcs: + lcs = lcs[1:] + if i < len(a): + i += 1 + if j < len(b): + j += 1 + + [i, j, lcs, diff_str] + }, vals)[-1] +} + +diff = lambda a: str, b: str { + """Compare a and b (string type); return a Differ-style delta string.""" + ndiff(a.splitlines(), b.splitlines()) +} diff --git a/difflib/main_test.k b/difflib/main_test.k new file mode 100644 index 00000000..c9b6cd57 --- /dev/null +++ b/difflib/main_test.k @@ -0,0 +1,45 @@ +import yaml + +schema Suite: + a: str + b: str + result: str + +test_longest_common_subsequence = lambda { + cases: [Suite] = [ + Suite { + a: "\n".join(["this", "is", "a", "example", "xxx"]) + b: "\n".join(["this", "is", "an", "example", "xxx"]) + result: """ this + is ++ an ++ example ++ xxx +- a +- example +- xxx +""" + } + Suite { + a: yaml.encode({ + "a": 1 + "b": 2 +}) + b: yaml.encode({ + "a": 1 + "c": 1 + "d": 3 + "b": 2 +}) + result: """ a: 1 ++ c: 1 ++ d: 3 + b: 2 +""" + } + ] + for_each(cases, lambda case: Suite { + result = diff(case.a, case.b) + assert result == case.result, "expect ${case.result}, got ${result}" + }) +}