Skip to content

Commit

Permalink
Fixes #588, fixes #564, changes #568, relates #550 - Only filter out …
Browse files Browse the repository at this point in the history
…known problematic characters

Can not be too aggressive as some fonts contain private area code points, etc and expect them to be output.
  • Loading branch information
danfickle committed Oct 30, 2020
1 parent ed2bd9c commit 9ffd0e4
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ public interface TextRenderer {
*
* @param input The string can be null
* @return The cleaned string or <code>null</code> if the input is null
* @see com.openhtmltopdf.util.OpenUtil#isCodePointPrintable(int)
* @see com.openhtmltopdf.util.OpenUtil#isSafeFontCodePointToPrint(int)
*/
static String getEffectivePrintableString(String input) {
public static String getEffectivePrintableString(String input) {
if (input == null || input.isEmpty() || areAllCharactersPrintable(input)) {
return input;
}

StringBuilder effective = new StringBuilder(input.length());
input.codePoints().filter(OpenUtil::isCodePointPrintable).forEach(effective::appendCodePoint);
input.codePoints().filter(OpenUtil::isSafeFontCodePointToPrint).forEach(effective::appendCodePoint);

return effective.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ private OpenUtil() {}
* Checks if a code point is printable. If false, it can be safely discarded at the
* rendering stage, else it should be replaced with the replacement character,
* if a suitable glyph can not be found.
*
* NOTE: This should only be called after a character has been shown to be
* NOT present in the font. It can not be called beforehand because some fonts
* contain private area characters and so on. Issue#588.
*
* @param codePoint
* @return whether codePoint is printable
*/
Expand All @@ -26,14 +31,31 @@ public static boolean isCodePointPrintable(int codePoint) {
category == Character.SURROGATE);
}

/**
* Whether the code point should be passed through to the font
* for rendering. It effectively filters out characters that
* have been shown to be problematic in some (broken) fonts such
* as visible soft-hyphens.
*/
public static boolean isSafeFontCodePointToPrint(int codePoint) {
switch (codePoint) {
case 0xAD: // Soft hyphen, PR#550, FALLTHRU
case 0xFFFC: // Object replacement character, Issue#564.
return false;

default:
return true;
}
}

/**
* Returns <code>true</code>, when all characters of the given string are printable.
* @param str a non-null string to test
* @return whether all characters are printable
*/
public static boolean areAllCharactersPrintable(String str) {
Objects.requireNonNull(str, "str");
return str.codePoints().allMatch(OpenUtil::isCodePointPrintable);
return str.codePoints().allMatch(OpenUtil::isSafeFontCodePointToPrint);
}

public static Integer parseIntegerOrNull(String possibleInteger) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import com.openhtmltopdf.simple.extend.ReplacedElementScaleHelper;
import com.openhtmltopdf.util.ArrayUtil;
import com.openhtmltopdf.util.LogMessageId;
import com.openhtmltopdf.util.OpenUtil;
import com.openhtmltopdf.util.XRLog;
import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2D;
import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2DFontTextDrawer;
Expand Down Expand Up @@ -74,10 +73,6 @@
import java.util.Map.Entry;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;

import static com.openhtmltopdf.util.OpenUtil.areAllCharactersPrintable;

public class PdfBoxFastOutputDevice extends AbstractOutputDevice implements OutputDevice, PdfBoxOutputDevice {
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.stream.IntStream;

import com.openhtmltopdf.bidi.BidiReorderer;
import com.openhtmltopdf.extend.FontContext;
Expand All @@ -35,11 +34,10 @@
import com.openhtmltopdf.render.FSFontMetrics;
import com.openhtmltopdf.render.JustificationInfo;
import com.openhtmltopdf.util.LogMessageId;
import com.openhtmltopdf.util.OpenUtil;
import com.openhtmltopdf.util.ThreadCtx;
import com.openhtmltopdf.util.XRLog;

import static com.openhtmltopdf.util.OpenUtil.isCodePointPrintable;

public class PdfBoxTextRenderer implements TextRenderer {
private static float TEXT_MEASURING_DELTA = 0.01f;

Expand Down Expand Up @@ -194,7 +192,9 @@ public static List<FontRun> divideIntoFontRuns(FSFont font, String str, BidiReor
i += Character.charCount(unicode);
String ch = String.valueOf(Character.toChars(unicode));

if (!isCodePointPrintable(unicode)) {
if (!OpenUtil.isSafeFontCodePointToPrint(unicode)) {
// Filter out characters that should never be visible (such
// as soft-hyphen) but are in some fonts.
continue;
}

Expand Down Expand Up @@ -264,8 +264,14 @@ else if (des != current.des) {
}
}
}

if (!gotChar) {
if (!OpenUtil.isCodePointPrintable(unicode)) {
// Filter out control, etc characters when they
// are not present in any font.
continue;
}

// We still don't have the character after all that. So use replacement character.
if (current.des == null) {
// First character of run.
Expand Down Expand Up @@ -366,6 +372,8 @@ public int getSmoothingLevel() {
return 0;
}

@Deprecated
@Override
public void setSmoothingLevel(int level) {
}

Expand Down

0 comments on commit 9ffd0e4

Please sign in to comment.