Skip to content

Commit

Permalink
Proposed fix of the problem. Additionally the logic has been changed,…
Browse files Browse the repository at this point in the history
… since the canonical approach seems to be too strict and confusing in case the formatter is also used in the IDE. To preserve previous user adaptations, the minimum edit distance is used to distinguish new changes and previous user adaptations.
  • Loading branch information
fvgh committed Apr 29, 2017
1 parent c18a511 commit 2c628a8
Show file tree
Hide file tree
Showing 8 changed files with 318 additions and 61 deletions.
22 changes: 10 additions & 12 deletions PADDEDCELL.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,29 +62,27 @@ spotless {
then it will run in the following way:

- When you call `spotlessApply`, it will automatically check for a ping-pong condition.
- If there is a ping-pong condition, it will resolve the ambiguity arbitrarily, but consistently
- If there is a ping-pong condition, it will accept the users choice
- It will also warn that `filename such-and-such cycles between 2 steps`.

## How is the ambiguity resolved?

This is easiest to show in an example:

* Two-state cycle: `'CCCC' 'A' 'B' 'A' ...`
+ `F(F('CCC'))` should equal `F('CCC')`, but it didn't, so we iterate until we find a cycle.
+ In this case, that cycle turns out to be `'B' 'A'`
+ To resolve the ambiguity about which element in the cycle is "right", Spotless uses the lowest element sorted by first by length then alphabetically.
+ In this case, `A` is the canonical form which will be used by `spotlessApply` and `spotlessCheck`.

* Four-state cycle: `'CCCC' 'A' 'B' 'C' 'D' 'A' 'B' 'C' 'D'`
* Two-state cycle: `'AA' 'Aa' 'Bb' 'Aa' 'Bb' ...`
+ `F(F('AA'))` should equal `F('AA')`, but it didn't, so we iterate until we find a cycle.
+ In this case, that cycle turns out to be `'Aa' 'Bb'`
+ Spotless will select automatically the solution of the cycle, which the minimum edit distance to the input is `'Aa'`.
+ The user can either manually adapt the code to use other options in the cycle (`'Bb'`) or apply a custom rule.
* Four-state cycle: `'CCCC' 'B' 'C' 'D' 'A' 'B' 'C' 'D' 'A'`
+ As above, we detect a cycle, and it turns out to be `'B' 'C' 'D' 'A'`
+ We resolve this cycle with the lowest element, which is `A`
+ We resolve this cycle with the minimum edit distance, which is `'C'`

* Convergence: `'ATT' 'AT' 'A' 'A'`
+ `F(F('ATT'))` did not equal `F('ATT')`, but there is no cycle.
+ Eventually, the sequence converged on `A`.
+ As a result, we will use `A` as the canoncial format.

* Divergence: `'1' '12' '123' '1234' '12345' ...`
+ This format does not cycle or converge
+ As a result, the canonical format is whatever the starting value was, which is `1` in this case.
+ PaddedCell gives up looking for a cycle or convergence and calls a sequence divergent after 10 tries.
+ This format does not cycle or converge
+ Hence the original input will not be changed
114 changes: 114 additions & 0 deletions lib/src/main/java/com/diffplug/spotless/Comparison.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright 2016 DiffPlug
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.diffplug.spotless;

import java.util.Arrays;
import java.util.Objects;

final class Comparison {

/** Interface is provided by static functions */
private Comparison() {}

/** Selects the character sequence of a list with the minimum edit distance to another sequence */
public static <T extends CharSequence> T nearest(Iterable<T> of, CharSequence to) {
T nearest = null;
int nearestDistance = Integer.MAX_VALUE;
for (T from : of) {
int distanceFrom = distance(from, to);
if (nearestDistance > distanceFrom) {
nearestDistance = distanceFrom;
nearest = from;
}
}
Objects.requireNonNull(nearest, "'of' must contain at least one element");
return nearest;
}

/** Returns the edit distance between the two sequences */
public static int distance(CharSequence seq1, CharSequence seq2) {
return ONP.compare(seq1, seq2);
}

/**
* Implementation based on "An O(NP) Sequence Comparison Algorithm",
* by Sun Wu, Udi Manber, Gene Myers.
*/
private static class ONP {
private final CharSequence a, b;
private final int m, n; //Length of a,b
private final int delta; //diagonal delta = n − m contains the sink
private final int[] fp; //furthest d-points
private final int fp_offset; //Offset to cope with negative array indexes

private ONP(CharSequence seq1, CharSequence seq2) {
//Algorithm preliminaries n>=m
if (seq1.length() < seq2.length()) {
a = seq1;
b = seq2;
} else {
a = seq2;
b = seq1;
}
m = a.length();
n = b.length();
delta = n - m;
fp = new int[m + n + 3]; //Index range is −(m+1)..(n+1)
fp_offset = m + 1; //Offset to index range
Arrays.fill(fp, -1); //fp[−(m+1)..(n+1)] := −1;
}

static int compare(CharSequence seq1, CharSequence seq2) {
Objects.requireNonNull(seq1, "seq1");
Objects.requireNonNull(seq2, "seq2");
ONP onp = new ONP(seq1, seq2);
return onp.editDistance();
}

private int editDistance() {
int p = -1;
do {
//Note that the max calculation moved to the snake method
++p; //p := p + 1;
for (int k = -p + fp_offset; k <= delta - 1 + fp_offset; ++k) { //For k := −p to delta−1
//fp[k] := snake( k, max (fp[k−1] + 1, fp[k+1]) );
fp[k] = snake(k);
}
for (int k = delta + p + fp_offset; k >= delta + 1 + fp_offset; --k) { // For k := delta + p downto delta + 1 by −1 do
//fp[k] := snake( k, max (fp[k−1] + 1, fp[k+1]) );
fp[k] = snake(k);
}
//fp[delta] := snake( delta, max (fp[delta−1] + 1, fp[delta+1]) );
fp[delta + fp_offset] = snake(delta + fp_offset);
} while (fp[delta + fp_offset] < n);
return delta + 2 * p; //The edit distance
}

/**
* Function snake( k, y: int) : int
* y calculation moved from editDistance into this method
*/
private int snake(int k) {
int y = Math.max(fp[k - 1] + 1, fp[k + 1]);
int x = y - k + fp_offset;
while (x < m && y < n && a.charAt(x) == b.charAt(y)) {
++x;
++y;
}
return y;
}
}
}
2 changes: 1 addition & 1 deletion lib/src/main/java/com/diffplug/spotless/Formatter.java
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public Formatter build() {
}
}

/** Returns true iff the given file's formatting is up-to-date. */
/** Returns true if the given file's formatting is up-to-date. */
public boolean isClean(File file) throws IOException {
Objects.requireNonNull(file);

Expand Down
88 changes: 67 additions & 21 deletions lib/src/main/java/com/diffplug/spotless/PaddedCell.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,21 @@ public enum Type {
CONVERGE, CYCLE, DIVERGE;

/** Creates a PaddedCell with the given file and steps. */
PaddedCell create(File file, List<String> steps) {
return new PaddedCell(file, this, steps);
PaddedCell create(File file, List<String> steps, String original) {
return new PaddedCell(file, this, steps, original);
}

}

private final File file;
private final Type type;
private final List<String> steps;
private final String original;

private PaddedCell(File file, Type type, List<String> steps) {
private PaddedCell(File file, Type type, List<String> steps, String original) {
this.file = Objects.requireNonNull(file, "file");
this.type = Objects.requireNonNull(type, "type");
this.original = Objects.requireNonNull(original, "original");
// defensive copy
this.steps = new ArrayList<>(steps);
requireElementsNonNull(this.steps);
Expand Down Expand Up @@ -103,52 +106,95 @@ private static PaddedCell check(Formatter formatter, File file, String original,
if (maxLength < 2) {
throw new IllegalArgumentException("maxLength must be at least 2");
}
String appliedOnce = formatter.compute(original, file);
if (appliedOnce.equals(original)) {
return Type.CONVERGE.create(file, Collections.singletonList(appliedOnce));
}

String appliedTwice = formatter.compute(appliedOnce, file);
if (appliedOnce.equals(appliedTwice)) {
return Type.CONVERGE.create(file, Collections.singletonList(appliedOnce));
List<String> appliedN = new ArrayList<>();
String input = formatter.compute(original, file);
appliedN.add(input);
if (input.equals(original)) {
return Type.CONVERGE.create(file, appliedN, original);
}

List<String> appliedN = new ArrayList<>();
appliedN.add(appliedOnce);
appliedN.add(appliedTwice);
String input = appliedTwice;
while (appliedN.size() < maxLength) {
String output = formatter.compute(input, file);
if (output.equals(input)) {
return Type.CONVERGE.create(file, appliedN);
return Type.CONVERGE.create(file, appliedN, original);
} else {
int idx = appliedN.indexOf(output);
if (idx >= 0) {
return Type.CYCLE.create(file, appliedN.subList(idx, appliedN.size()));
return Type.CYCLE.create(file, appliedN.subList(idx, appliedN.size()), original);
} else {
appliedN.add(output);
input = output;
}
}
}
return Type.DIVERGE.create(file, appliedN);
return Type.DIVERGE.create(file, appliedN, original);
}

/**
* Returns true iff the formatter misbehaved in any way
* Returns true if the formatter misbehaved in any way
* (did not converge after a single iteration).
*/
public boolean misbehaved() {
boolean isWellBehaved = type == Type.CONVERGE && steps.size() <= 1;
return !isWellBehaved;
}

/** Any result which doesn't diverge can be resolved. */
/** Returns true if the original input is final. */
public boolean isOriginalFinal() {
// @formatter:off
switch (type) {
case CONVERGE:
case CYCLE:
return steps.contains(original);
case DIVERGE:
return true; //If the result is diverging, there is nothing more the user can do
default:
throw new IllegalArgumentException("Unknown type: " + type);
}
// @formatter:on
}

/**
* Any result which doesn't diverge can be resolved.
* Use {@link #isOriginalFinal()} to determine whether the
* original file content meets the formatter rules.
*/
@Deprecated
public boolean isResolvable() {
return type != Type.DIVERGE;
}

/** Returns the "canonical" form for this particular result (only possible if isResolvable). */
/** Returns the solution on best effort basis */
public String finalResult() {
// @formatter:off
switch (type) {
case CONVERGE:
/* Take the converged solution */
return steps.get(steps.size()-1);
case CYCLE:
if (isOriginalFinal()) {
/* in case user input is part of the cycle, let the user decided */
return original;
}
/* In case the formatter converges into a cycle,
* select the minimum edit distance to preserve,
* user modifications */
return Comparison.nearest(steps, original);
case DIVERGE:
return original;
default:
throw new IllegalArgumentException("Unknown type: " + type);
}

}

/**
* Returns the "canonical" form for this particular result (only possible if isResolvable).
* The usage of the "canonical" formatted file content is discouraged, since
* it is not transparent for the user. Instead the {@link #finalResult()}
* should be used.
*/
@Deprecated
public String canonical() {
// @formatter:off
switch (type) {
Expand Down
22 changes: 7 additions & 15 deletions lib/src/main/java/com/diffplug/spotless/PaddedCellBulk.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@
* ### spotlessCheck with paddedCell on
*
* Spotless check behaves as normal, finding a list of problem files, but then passes that list
* to {@link #check(File, File, Formatter, List)}. If there were no problem files, then `paddedCell`
* is no longer necessary, so users might as well turn it off, so we give that info as a warning.
* to {@link #check(File, File, Formatter, List)}.
*/
public final class PaddedCellBulk {
private static final Logger logger = Logger.getLogger(PaddedCellBulk.class.getName());
Expand Down Expand Up @@ -145,10 +144,7 @@ public static List<File> check(File rootDir, File diagnoseDir, Formatter formatt
// dump the type of the misbehavior to console
logger.finer(" " + relative + " " + padded.userMessage());

if (!padded.isResolvable()) {
// if it's not resolvable, then there's
// no point killing the build over it
} else {
if (!padded.isOriginalFinal()) {
// if the input is resolvable, we'll use that to try again at
// determining if it's clean
paddedCellStep.set(problemFile, padded.steps().get(0));
Expand Down Expand Up @@ -211,18 +207,14 @@ public static void apply(Formatter formatter, File file) throws IOException {

// F(input) != input, so we'll do a padded check
PaddedCell cell = PaddedCell.check(formatter, file, rawUnix);
if (!cell.isResolvable()) {
// nothing we can do, but check will warn and dump out the divergence path
return;
}

// get the canonical bytes
String canonicalUnix = cell.canonical();
String canonical = formatter.computeLineEndings(canonicalUnix, file);
byte[] canonicalBytes = canonical.getBytes(formatter.getEncoding());
if (!Arrays.equals(rawBytes, canonicalBytes)) {
String solutionUnix = cell.finalResult();
String solution = formatter.computeLineEndings(solutionUnix, file);
byte[] solutionBytes = solution.getBytes(formatter.getEncoding());
if (!Arrays.equals(rawBytes, solutionBytes)) {
// and write them to disk if needed
Files.write(file.toPath(), canonicalBytes, StandardOpenOption.TRUNCATE_EXISTING);
Files.write(file.toPath(), solutionBytes, StandardOpenOption.TRUNCATE_EXISTING);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,19 @@ public void paddedCellApply() throws IOException {
Bundle converge = converge().paddedCell();
Bundle diverge = diverge().paddedCell();

cycle.apply.execute();
converge.apply.execute();
diverge.apply.execute();

assertFileContent("A", cycle.file); // cycle -> first element in cycle
assertFileContent("", converge.file); // converge -> converges
assertFileContent("CCC", diverge.file); // diverge -> no change

cycle.check.execute();
converge.check.execute();
diverge.check.execute();
for (int i = 0; i < 2; i++) {
cycle.apply.execute();
converge.apply.execute();
diverge.apply.execute();

assertFileContent("A", cycle.file); // cycle -> first element in cycle (since all have the same minimum distance)
assertFileContent("", converge.file); // converge -> converges
assertFileContent("CCC", diverge.file); // diverge -> no change

cycle.check.execute();
converge.check.execute();
diverge.check.execute();
}
}

@Test
Expand Down
Loading

0 comments on commit 2c628a8

Please sign in to comment.